From 2dcd5e4f4b9bab956b9ab679cc1b3ff2c15c85b5 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 4 Feb 2023 14:13:14 +0100 Subject: [PATCH 01/63] Expand runtime lookups in a late phase --- src/coreclr/jit/compiler.cpp | 3 + src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/compphases.h | 1 + src/coreclr/jit/flowgraph.cpp | 39 ++++ src/coreclr/jit/gentree.h | 5 - src/coreclr/jit/importer.cpp | 61 +----- src/coreclr/jit/indirectcalltransformer.cpp | 217 +------------------- 7 files changed, 50 insertions(+), 279 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 1104aabaf1c869..3459fba7f43028 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4918,6 +4918,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl fgDomsComputed = false; optLoopTableValid = false; + // Expand runtime lookups (an optimization but we'd better run it in tier0 too) + DoPhase(this, PHASE_EXPAND_RTLOOKUPS, &Compiler::fgExpandRuntimeLookups); + // Insert GC Polls DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 45dbbec2eb2674..45a4d8e7f145d6 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5222,6 +5222,7 @@ class Compiler // Initialize the per-block variable sets (used for liveness analysis). void fgInitBlockVarSets(); + PhaseStatus fgExpandRuntimeLookups(); PhaseStatus fgInsertGCPolls(); BasicBlock* fgCreateGCPoll(GCPollType pollType, BasicBlock* block); @@ -7025,8 +7026,6 @@ class Compiler optMethodFlags &= ~OMF_HAS_EXPRUNTIMELOOKUP; } - void addExpRuntimeLookupCandidate(GenTreeCall* call); - bool doesMethodHavePatchpoints() { return (optMethodFlags & OMF_HAS_PATCHPOINT) != 0; diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 4d9d39cca25cf9..0d2a4ee2170817 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -92,6 +92,7 @@ CompPhaseNameMacro(PHASE_IF_CONVERSION, "If conversion", CompPhaseNameMacro(PHASE_VN_BASED_DEAD_STORE_REMOVAL,"VN-based dead store removal", false, -1, false) CompPhaseNameMacro(PHASE_OPT_UPDATE_FLOW_GRAPH, "Update flow graph opt pass", false, -1, false) CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS2, "Compute edge weights (2, false)",false, -1, false) +CompPhaseNameMacro(PHASE_EXPAND_RTLOOKUPS, "Expand runtime lookups", false, -1, true) CompPhaseNameMacro(PHASE_INSERT_GC_POLLS, "Insert GC Polls", false, -1, true) CompPhaseNameMacro(PHASE_DETERMINE_FIRST_COLD_BLOCK, "Determine first cold block", false, -1, true) CompPhaseNameMacro(PHASE_RATIONALIZE, "Rationalize IR", false, -1, false) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 66c3601013bb2d..3cdd42dee792fa 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -52,6 +52,45 @@ static bool blockNeedsGCPoll(BasicBlock* block) return blockMayNeedGCPoll; } +//------------------------------------------------------------------------------ +// fgExpandRuntimeLookups : partially expand runtime lookups helper calls +// to add a nullcheck [+ size check] and a fast path +// Returns: +// PhaseStatus indicating what, if anything, was changed. +// +PhaseStatus Compiler::fgExpandRuntimeLookups() +{ + PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; + if (!doesMethodHaveExpRuntimeLookup()) + { + return result; + } + + for (BasicBlock* const block : Blocks()) + { + for (Statement* const stmt : block->Statements()) + { + for (GenTree* tree = stmt->GetTreeList(); tree != nullptr; tree = tree->gtNext) + { + if (!tree->IsCall() || !tree->AsCall()->IsExpRuntimeLookup()) + { + continue; + } + assert(tree->IsHelperCall()); + + // TODO: expand runtime lookups into: + // + // isNull ? helperCall : IND(fastpath) + // + // or (for dynamic expansion): + // + // isNull ? helperCall : (sizeCheck ? IND(fastPath) : helperCall) + } + } + } + return result; +} + //------------------------------------------------------------------------------ // fgInsertGCPolls : Insert GC polls for basic blocks containing calls to methods // with SuppressGCTransitionAttribute. diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 1e3f87248589d7..db8c25154ff548 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5520,11 +5520,6 @@ struct GenTreeCall final : public GenTree gtCallMoreFlags |= GTF_CALL_M_EXP_RUNTIME_LOOKUP; } - void ClearExpRuntimeLookup() - { - gtCallMoreFlags &= ~GTF_CALL_M_EXP_RUNTIME_LOOKUP; - } - bool IsExpRuntimeLookup() const { return (gtCallMoreFlags & GTF_CALL_M_EXP_RUNTIME_LOOKUP) != 0; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 53e510410b3a0b..8b1d02df4c2806 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1881,62 +1881,17 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken assert(pRuntimeLookup->indirections != 0); - impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark1")); - - // Extract the handle - GenTree* handleForNullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); - handleForNullCheck->gtFlags |= GTF_IND_NONFAULTING; - // Call the helper // - Setup argNode with the pointer to the signature returned by the lookup GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); - GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); - // Check for null and possibly call helper - GenTree* nullCheck = gtNewOperNode(GT_NE, TYP_INT, handleForNullCheck, gtNewIconNode(0, TYP_I_IMPL)); - GenTree* handleForResult = gtCloneExpr(handleForNullCheck); - - GenTree* result = nullptr; - - if (pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK) - { - // Dynamic dictionary expansion support - - assert((lastIndOfTree != nullptr) && (pRuntimeLookup->indirections > 0)); - - // sizeValue = dictionary[pRuntimeLookup->sizeOffset] - GenTreeIntCon* sizeOffset = gtNewIconNode(pRuntimeLookup->sizeOffset, TYP_I_IMPL); - GenTree* sizeValueOffset = gtNewOperNode(GT_ADD, TYP_I_IMPL, lastIndOfTree, sizeOffset); - GenTree* sizeValue = gtNewOperNode(GT_IND, TYP_I_IMPL, sizeValueOffset); - sizeValue->gtFlags |= GTF_IND_NONFAULTING; - - // sizeCheck fails if sizeValue < pRuntimeLookup->offsets[i] - GenTree* offsetValue = gtNewIconNode(pRuntimeLookup->offsets[pRuntimeLookup->indirections - 1], TYP_I_IMPL); - GenTree* sizeCheck = gtNewOperNode(GT_LE, TYP_INT, sizeValue, offsetValue); - - // revert null check condition. - nullCheck->ChangeOperUnchecked(GT_EQ); - - // ((sizeCheck fails || nullCheck fails))) ? (helperCall : handle). - // Add checks and the handle as call arguments, indirect call transformer will handle this. - NewCallArg nullCheckArg = NewCallArg::Primitive(nullCheck); - NewCallArg sizeCheckArg = NewCallArg::Primitive(sizeCheck); - NewCallArg handleForResultArg = NewCallArg::Primitive(handleForResult); - helperCall->gtArgs.PushFront(this, nullCheckArg, sizeCheckArg, handleForResultArg); - result = helperCall; - addExpRuntimeLookupCandidate(helperCall); - } - else - { - GenTreeColon* colonNullCheck = new (this, GT_COLON) GenTreeColon(TYP_I_IMPL, handleForResult, helperCall); - result = gtNewQmarkNode(TYP_I_IMPL, nullCheck, colonNullCheck); - } - - unsigned tmp = lvaGrabTemp(true DEBUGARG("spilling Runtime Lookup tree")); + // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. + // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 + setMethodHasExpRuntimeLookup(); - impAssignTempGen(tmp, result, CHECK_SPILL_NONE); - return gtNewLclvNode(tmp, TYP_I_IMPL); + helperCall->SetExpRuntimeLookup(); + return helperCall; } struct RecursiveGuard @@ -14062,12 +14017,6 @@ methodPointerInfo* Compiler::impAllocateMethodPointerInfo(const CORINFO_RESOLVED return memory; } -void Compiler::addExpRuntimeLookupCandidate(GenTreeCall* call) -{ - setMethodHasExpRuntimeLookup(); - call->SetExpRuntimeLookup(); -} - //------------------------------------------------------------------------ // impIsClassExact: check if a class handle can only describe values // of exactly one class. diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index 1d21937119c11c..e9c6d5b27fa811 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -115,12 +115,6 @@ class IndirectCallTransformer transformer.Run(); count++; } - else if (compiler->doesMethodHaveExpRuntimeLookup() && ContainsExpRuntimeLookup(stmt)) - { - ExpRuntimeLookupTransformer transformer(compiler, block, stmt); - transformer.Run(); - count++; - } } return count; @@ -159,28 +153,6 @@ class IndirectCallTransformer return candidate->IsCall() && candidate->AsCall()->IsGuardedDevirtualizationCandidate(); } - //------------------------------------------------------------------------ - // ContainsExpRuntimeLookup: check if this statement contains a dictionary - // with dynamic dictionary expansion that we want to transform in CFG. - // - // Return Value: - // true if contains, false otherwise. - // - bool ContainsExpRuntimeLookup(Statement* stmt) - { - GenTree* candidate = stmt->GetRootNode(); - if (candidate->OperIs(GT_ASG)) - { - candidate = candidate->gtGetOp2(); - } - if (candidate->OperIs(GT_CALL)) - { - GenTreeCall* call = candidate->AsCall(); - return call->IsExpRuntimeLookup(); - } - return false; - } - class Transformer { public: @@ -1221,191 +1193,6 @@ class IndirectCallTransformer } }; - // Runtime lookup with dynamic dictionary expansion transformer, - // it expects helper runtime lookup call with additional arguments that are: - // result handle, nullCheck tree, sizeCheck tree. - // before: - // current block - // { - // previous statements - // transforming statement - // { - // ASG lclVar, call with GTF_CALL_M_EXP_RUNTIME_LOOKUP flag set and additional arguments. - // } - // subsequent statements - // } - // - // after: - // current block - // { - // previous statements - // } BBJ_NONE check block - // check block - // { - // jump to else if the handle fails size check - // } BBJ_COND check block2, else block - // check block2 - // { - // jump to else if the handle fails null check - // } BBJ_COND then block, else block - // then block - // { - // return handle - // } BBJ_ALWAYS remainder block - // else block - // { - // do a helper call - // } BBJ_NONE remainder block - // remainder block - // { - // subsequent statements - // } - // - class ExpRuntimeLookupTransformer final : public Transformer - { - public: - ExpRuntimeLookupTransformer(Compiler* compiler, BasicBlock* block, Statement* stmt) - : Transformer(compiler, block, stmt) - { - GenTreeOp* asg = stmt->GetRootNode()->AsOp(); - resultLclNum = asg->gtOp1->AsLclVar()->GetLclNum(); - origCall = GetCall(stmt); - checkBlock2 = nullptr; - } - - protected: - virtual const char* Name() override - { - return "ExpRuntimeLookup"; - } - - //------------------------------------------------------------------------ - // GetCall: find a call in a statement. - // - // Arguments: - // callStmt - the statement with the call inside. - // - // Return Value: - // call tree node pointer. - virtual GenTreeCall* GetCall(Statement* callStmt) override - { - GenTree* tree = callStmt->GetRootNode(); - assert(tree->OperIs(GT_ASG)); - GenTreeCall* call = tree->gtGetOp2()->AsCall(); - return call; - } - - //------------------------------------------------------------------------ - // ClearFlag: clear runtime exp lookup flag from the original call. - // - virtual void ClearFlag() override - { - origCall->ClearExpRuntimeLookup(); - } - - // FixupRetExpr: no action needed. - virtual void FixupRetExpr() override - { - } - - //------------------------------------------------------------------------ - // CreateCheck: create check blocks, that checks dictionary size and does null test. - // - virtual void CreateCheck() override - { - CallArg* nullCheck = origCall->gtArgs.GetArgByIndex(0); - CallArg* sizeCheck = origCall->gtArgs.GetArgByIndex(1); - origCall->gtArgs.Remove(nullCheck); - origCall->gtArgs.Remove(sizeCheck); - // The first argument is the handle now. - checkBlock = CreateAndInsertBasicBlock(BBJ_COND, currBlock); - - assert(sizeCheck->GetEarlyNode()->OperIs(GT_LE)); - GenTree* sizeJmpTree = compiler->gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck->GetNode()); - Statement* sizeJmpStmt = compiler->fgNewStmtFromTree(sizeJmpTree, stmt->GetDebugInfo()); - compiler->fgInsertStmtAtEnd(checkBlock, sizeJmpStmt); - - checkBlock2 = CreateAndInsertBasicBlock(BBJ_COND, checkBlock); - assert(nullCheck->GetEarlyNode()->OperIs(GT_EQ)); - GenTree* nullJmpTree = compiler->gtNewOperNode(GT_JTRUE, TYP_VOID, nullCheck->GetNode()); - Statement* nullJmpStmt = compiler->fgNewStmtFromTree(nullJmpTree, stmt->GetDebugInfo()); - compiler->fgInsertStmtAtEnd(checkBlock2, nullJmpStmt); - } - - //------------------------------------------------------------------------ - // CreateThen: create then block, that is executed if the checks succeed. - // This simply returns the handle. - // - virtual void CreateThen() override - { - thenBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, checkBlock2); - - CallArg* resultHandle = origCall->gtArgs.GetArgByIndex(0); - // The first argument is the real first argument for the call now. - origCall->gtArgs.Remove(resultHandle); - - GenTree* asg = compiler->gtNewTempAssign(resultLclNum, resultHandle->GetNode()); - Statement* asgStmt = compiler->gtNewStmt(asg, stmt->GetDebugInfo()); - compiler->fgInsertStmtAtEnd(thenBlock, asgStmt); - } - - //------------------------------------------------------------------------ - // CreateElse: create else block, that is executed if the checks fail. - // - virtual void CreateElse() override - { - elseBlock = CreateAndInsertBasicBlock(BBJ_NONE, thenBlock); - GenTree* asg = compiler->gtNewTempAssign(resultLclNum, origCall); - Statement* asgStmt = compiler->gtNewStmt(asg, stmt->GetDebugInfo()); - compiler->fgInsertStmtAtEnd(elseBlock, asgStmt); - } - - //------------------------------------------------------------------------ - // SetWeights: set weights for new blocks. - // - virtual void SetWeights() override - { - remainderBlock->inheritWeight(currBlock); - checkBlock->inheritWeight(currBlock); - checkBlock2->inheritWeightPercentage(checkBlock, HIGH_PROBABILITY); - thenBlock->inheritWeightPercentage(currBlock, HIGH_PROBABILITY); - elseBlock->inheritWeightPercentage(currBlock, 100 - HIGH_PROBABILITY); - } - - //------------------------------------------------------------------------ - // ChainFlow: link new blocks into correct cfg. - // - virtual void ChainFlow() override - { - assert(compiler->fgPredsComputed); - - // currBlock - compiler->fgRemoveRefPred(remainderBlock, currBlock); - compiler->fgAddRefPred(checkBlock, currBlock); - - // checkBlock - checkBlock->bbJumpDest = elseBlock; - compiler->fgAddRefPred(elseBlock, checkBlock); - compiler->fgAddRefPred(checkBlock2, checkBlock); - - // checkBlock2 - checkBlock2->bbJumpDest = elseBlock; - compiler->fgAddRefPred(elseBlock, checkBlock2); - compiler->fgAddRefPred(thenBlock, checkBlock2); - - // thenBlock - thenBlock->bbJumpDest = remainderBlock; - compiler->fgAddRefPred(remainderBlock, thenBlock); - - // elseBlock - compiler->fgAddRefPred(remainderBlock, elseBlock); - } - - private: - BasicBlock* checkBlock2; - unsigned resultLclNum; - }; - Compiler* compiler; }; @@ -1424,7 +1211,6 @@ Compiler::fgWalkResult Compiler::fgDebugCheckForTransformableIndirectCalls(GenTr GenTreeCall* call = tree->AsCall(); assert(!call->IsFatPointerCandidate()); assert(!call->IsGuardedDevirtualizationCandidate()); - assert(!call->IsExpRuntimeLookup()); } return WALK_CONTINUE; } @@ -1436,7 +1222,6 @@ Compiler::fgWalkResult Compiler::fgDebugCheckForTransformableIndirectCalls(GenTr void Compiler::CheckNoTransformableIndirectCallsRemain() { assert(!doesMethodHaveFatPointer()); - assert(!doesMethodHaveExpRuntimeLookup()); for (BasicBlock* const block : Blocks()) { @@ -1460,7 +1245,7 @@ void Compiler::CheckNoTransformableIndirectCallsRemain() PhaseStatus Compiler::fgTransformIndirectCalls() { int count = 0; - if (doesMethodHaveFatPointer() || doesMethodHaveGuardedDevirtualization() || doesMethodHaveExpRuntimeLookup()) + if (doesMethodHaveFatPointer() || doesMethodHaveGuardedDevirtualization()) { IndirectCallTransformer indirectCallTransformer(this); count = indirectCallTransformer.Run(); From 2c7a4a3c03deb70d6f5614a06a2a0e1552ff45bf Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 4 Feb 2023 16:31:32 +0100 Subject: [PATCH 02/63] Clean up --- src/coreclr/jit/importer.cpp | 95 ++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 47 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 8b1d02df4c2806..4ae8bde5857c8e 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1782,6 +1782,24 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken return gtNewRuntimeLookupHelperCallNode(pRuntimeLookup, ctxTree, compileTimeHandle); } + if (pRuntimeLookup->testForNull) + { + assert(pRuntimeLookup->indirections != 0); + + impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling sideEffects")); + + GenTree* argNode = + gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); + GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); + + // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. + // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 + setMethodHasExpRuntimeLookup(); + + helperCall->SetExpRuntimeLookup(); + return helperCall; + } + // Slot pointer GenTree* slotPtrTree = ctxTree; @@ -1836,62 +1854,45 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken } // No null test required - if (!pRuntimeLookup->testForNull) + if (pRuntimeLookup->indirections == 0) { - if (pRuntimeLookup->indirections == 0) - { - return slotPtrTree; - } - - slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); - slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; - - if (!pRuntimeLookup->testForFixup) - { - return slotPtrTree; - } - - impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark0")); - - unsigned slotLclNum = lvaGrabTemp(true DEBUGARG("impRuntimeLookup test")); - impAssignTempGen(slotLclNum, slotPtrTree, NO_CLASS_HANDLE, CHECK_SPILL_ALL, nullptr, impCurStmtDI); + return slotPtrTree; + } - GenTree* slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); - // downcast the pointer to a TYP_INT on 64-bit targets - slot = impImplicitIorI4Cast(slot, TYP_INT); - // Use a GT_AND to check for the lowest bit and indirect if it is set - GenTree* test = gtNewOperNode(GT_AND, TYP_INT, slot, gtNewIconNode(1)); - GenTree* relop = gtNewOperNode(GT_EQ, TYP_INT, test, gtNewIconNode(0)); + slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); + slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; - // slot = GT_IND(slot - 1) - slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); - GenTree* add = gtNewOperNode(GT_ADD, TYP_I_IMPL, slot, gtNewIconNode(-1, TYP_I_IMPL)); - GenTree* indir = gtNewOperNode(GT_IND, TYP_I_IMPL, add); - indir->gtFlags |= GTF_IND_NONFAULTING; - indir->gtFlags |= GTF_IND_INVARIANT; + if (!pRuntimeLookup->testForFixup) + { + return slotPtrTree; + } - slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); - GenTree* asg = gtNewAssignNode(slot, indir); - GenTreeColon* colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), asg); - GenTreeQmark* qmark = gtNewQmarkNode(TYP_VOID, relop, colon); - impAppendTree(qmark, CHECK_SPILL_NONE, impCurStmtDI); + impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark0")); - return gtNewLclvNode(slotLclNum, TYP_I_IMPL); - } + unsigned slotLclNum = lvaGrabTemp(true DEBUGARG("impRuntimeLookup test")); + impAssignTempGen(slotLclNum, slotPtrTree, NO_CLASS_HANDLE, CHECK_SPILL_ALL, nullptr, impCurStmtDI); - assert(pRuntimeLookup->indirections != 0); + GenTree* slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); + // downcast the pointer to a TYP_INT on 64-bit targets + slot = impImplicitIorI4Cast(slot, TYP_INT); + // Use a GT_AND to check for the lowest bit and indirect if it is set + GenTree* test = gtNewOperNode(GT_AND, TYP_INT, slot, gtNewIconNode(1)); + GenTree* relop = gtNewOperNode(GT_EQ, TYP_INT, test, gtNewIconNode(0)); - // Call the helper - // - Setup argNode with the pointer to the signature returned by the lookup - GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); - GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); + // slot = GT_IND(slot - 1) + slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); + GenTree* add = gtNewOperNode(GT_ADD, TYP_I_IMPL, slot, gtNewIconNode(-1, TYP_I_IMPL)); + GenTree* indir = gtNewOperNode(GT_IND, TYP_I_IMPL, add); + indir->gtFlags |= GTF_IND_NONFAULTING; + indir->gtFlags |= GTF_IND_INVARIANT; - // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. - // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 - setMethodHasExpRuntimeLookup(); + slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); + GenTree* asg = gtNewAssignNode(slot, indir); + GenTreeColon* colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), asg); + GenTreeQmark* qmark = gtNewQmarkNode(TYP_VOID, relop, colon); + impAppendTree(qmark, CHECK_SPILL_NONE, impCurStmtDI); - helperCall->SetExpRuntimeLookup(); - return helperCall; + return gtNewLclvNode(slotLclNum, TYP_I_IMPL); } struct RecursiveGuard From dca8140839ada87319f85725838ddb28e96887fc Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 6 Feb 2023 12:57:21 +0100 Subject: [PATCH 03/63] test --- src/coreclr/jit/utils.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index bf5181be76a0a1..12b6297aff2953 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1348,6 +1348,7 @@ void HelperCallProperties::init() isPure = true; noThrow = true; nonNullReturn = true; + mutatesHeap = true; break; // type casting helpers From e3038696de3fef071271939280d26d4e395ac3f4 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 6 Feb 2023 13:04:19 +0100 Subject: [PATCH 04/63] test2 --- src/coreclr/jit/utils.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 12b6297aff2953..43ac023056f4c2 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1345,10 +1345,9 @@ void HelperCallProperties::init() case CORINFO_HELP_RUNTIMEHANDLE_CLASS_LOG: case CORINFO_HELP_READYTORUN_GENERIC_HANDLE: // logging helpers are not technically pure but can be optimized away - isPure = true; + isPure = false; noThrow = true; nonNullReturn = true; - mutatesHeap = true; break; // type casting helpers From 20e79dc022fd6509407a1f23f83d61c5fa1f11ea Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 25 Feb 2023 11:31:12 +0100 Subject: [PATCH 05/63] Test --- src/coreclr/jit/utils.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index f1ca33dc6c052c..4c75d372b39ac4 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1346,8 +1346,9 @@ void HelperCallProperties::init() case CORINFO_HELP_READYTORUN_GENERIC_HANDLE: // logging helpers are not technically pure but can be optimized away isPure = false; - noThrow = true; - nonNullReturn = true; + noThrow = false; + nonNullReturn = false; + mutatesHeap = true; break; // type casting helpers From 22f34a1aedd63281dc610756906c3efc3bd192bc Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 25 Feb 2023 11:52:57 +0100 Subject: [PATCH 06/63] Enable JitDasmWithAlignmentBoundaries in Release --- src/coreclr/jit/compiler.cpp | 30 ++++++++++++++---------------- src/coreclr/jit/compiler.h | 10 +++++----- src/coreclr/jit/emit.cpp | 11 +++-------- src/coreclr/jit/jitconfigvalues.h | 19 ++++++------------- 4 files changed, 28 insertions(+), 42 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 20b2030c7d7f6b..1130414704cd2c 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2828,17 +2828,17 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.compJitEarlyExpandMDArrays = (JitConfig.JitEarlyExpandMDArrays() != 0); - opts.disAsm = false; - opts.disDiffable = false; - opts.dspDiffable = false; + opts.disAsm = false; + opts.disDiffable = false; + opts.dspDiffable = false; + opts.disAlignment = false; + opts.disAddr = false; #ifdef DEBUG opts.dspInstrs = false; opts.dspLines = false; opts.varNames = false; opts.dmpHex = false; opts.disAsmSpilled = false; - opts.disAddr = false; - opts.disAlignment = false; opts.dspCode = false; opts.dspEHTable = false; opts.dspDebugInfo = false; @@ -2939,17 +2939,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.dspDiffable = true; } - // This one applies to both Ngen/Jit Disasm output: COMPlus_JitDasmWithAddress=1 - if (JitConfig.JitDasmWithAddress() != 0) - { - opts.disAddr = true; - } - - if (JitConfig.JitDasmWithAlignmentBoundaries() != 0) - { - opts.disAlignment = true; - } - if (JitConfig.JitLongAddress() != 0) { opts.compLongAddress = true; @@ -3043,6 +3032,15 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.disAsm = true; } #endif // !DEBUG + // This one applies to both Ngen/Jit Disasm output: COMPlus_JitDasmWithAddress=1 + if (JitConfig.JitDasmWithAddress() != 0) + { + opts.disAddr = true; + } + if (JitConfig.JitDasmWithAlignmentBoundaries() != 0) + { + opts.disAlignment = true; + } //------------------------------------------------------------------------- diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4ad0ab6b48e848..5b7acf123980a5 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9452,9 +9452,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool optRepeat; // Repeat optimizer phases k times #endif - bool disAsm; // Display native code as it is generated - bool dspDiffable; // Makes the Jit Dump 'diff-able' (currently uses same COMPlus_* flag as disDiffable) - bool disDiffable; // Makes the Disassembly code 'diff-able' + bool disAsm; // Display native code as it is generated + bool dspDiffable; // Makes the Jit Dump 'diff-able' (currently uses same COMPlus_* flag as disDiffable) + bool disDiffable; // Makes the Disassembly code 'diff-able' + bool disAlignment; // Display alignment boundaries in disassembly code + bool disAddr; // Display process address next to each instruction in disassembly code #ifdef DEBUG bool compProcedureSplittingEH; // Separate cold code from hot code for functions with EH bool dspCode; // Display native code generated @@ -9466,8 +9468,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool varNames; // Display variables names in native code output bool disAsmSpilled; // Display native code when any register spilling occurs bool disasmWithGC; // Display GC info interleaved with disassembly. - bool disAddr; // Display process address next to each instruction in disassembly code - bool disAlignment; // Display alignment boundaries in disassembly code bool disAsm2; // Display native code after it is generated using external disassembler bool dspOrder; // Display names of each of the methods that we ngen/jit bool dspUnwind; // Display the unwind info output diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 6b230993aa692f..752d8454e0f2fa 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -1445,12 +1445,10 @@ void emitter::appendToCurIG(instrDesc* id) void emitter::emitDispInsAddr(BYTE* code) { -#ifdef DEBUG if (emitComp->opts.disAddr) { printf(FMT_ADDR, DBG_ADDR(code)); } -#endif } void emitter::emitDispInsOffs(unsigned offs, bool doffs) @@ -7076,9 +7074,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, for (unsigned cnt = ig->igInsCnt; cnt > 0; cnt--) { -#ifdef DEBUG size_t curInstrAddr = (size_t)cp; instrDesc* curInstrDesc = id; +#ifdef DEBUG if ((emitComp->opts.disAsm || emitComp->verbose) && (JitConfig.JitDisasmWithDebugInfo() != 0) && (id->idCodeSize() > 0)) @@ -7112,9 +7110,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, size_t insSize = emitIssue1Instr(ig, id, &cp); emitAdvanceInstrDesc(&id, insSize); -#ifdef DEBUG // Print the alignment boundary - if ((emitComp->opts.disAsm || emitComp->verbose) && (emitComp->opts.disAddr || emitComp->opts.disAlignment)) + if ((emitComp->opts.disAsm INDEBUG(|| emitComp->verbose)) && + (emitComp->opts.disAddr || emitComp->opts.disAlignment)) { size_t afterInstrAddr = (size_t)cp; instruction curIns = curInstrDesc->idIns(); @@ -7197,7 +7195,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } } } -#endif // DEBUG } #ifdef DEBUG @@ -8117,12 +8114,10 @@ void emitter::emitDispDataSec(dataSecDsc* section, BYTE* dst) for (dataSection* data = section->dsdList; data != nullptr; data = data->dsNext) { -#ifdef DEBUG if (emitComp->opts.disAddr) { printf("; @" FMT_ADDR "\n", DBG_ADDR(dst)); } -#endif const char* labelFormat = "%-7s"; char label[64]; diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 4c44490bb35ed5..fbfea56f0b32b8 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -22,9 +22,7 @@ CONFIG_INTEGER(BreakOnDumpToken, W("BreakOnDumpToken"), 0xffffffff) // Breaks wh // particular token value. CONFIG_INTEGER(DebugBreakOnVerificationFailure, W("DebugBreakOnVerificationFailure"), 0) // Halts the jit on // verification failure -CONFIG_INTEGER(DiffableDasm, W("JitDiffableDasm"), 0) // Make the disassembly diff-able -CONFIG_INTEGER(JitDasmWithAddress, W("JitDasmWithAddress"), 0) // Print the process address next to each instruction of - // the disassembly +CONFIG_INTEGER(DiffableDasm, W("JitDiffableDasm"), 0) // Make the disassembly diff-able CONFIG_INTEGER(DisplayLoopHoistStats, W("JitLoopHoistStats"), 0) // Display JIT loop hoisting statistics CONFIG_INTEGER(DisplayLsraStats, W("JitLsraStats"), 0) // Display JIT Linear Scan Register Allocator statistics // If set to "1", display the stats in textual format. @@ -80,9 +78,6 @@ CONFIG_INTEGER(JitUnrollLoopMaxIterationCount, W("JitUnrollLoopMaxIterationCount"), DEFAULT_UNROLL_LOOP_MAX_ITERATION_COUNT) -// Print the alignment boundaries in disassembly. -CONFIG_INTEGER(JitDasmWithAlignmentBoundaries, W("JitDasmWithAlignmentBoundaries"), 0) - CONFIG_INTEGER(JitDirectAlloc, W("JitDirectAlloc"), 0) CONFIG_INTEGER(JitDoubleAlign, W("JitDoubleAlign"), 1) CONFIG_INTEGER(JitDumpASCII, W("JitDumpASCII"), 1) // Uses only ASCII characters in tree dumps @@ -190,7 +185,6 @@ CONFIG_INTEGER(TreesBeforeAfterMorph, W("JitDumpBeforeAfterMorph"), 0) // If 1, CONFIG_METHODSET(JitBreak, W("JitBreak")) // Stops in the importer when compiling a specified method CONFIG_METHODSET(JitDebugBreak, W("JitDebugBreak")) -CONFIG_METHODSET(JitDisasm, W("JitDisasm")) // Dumps disassembly for specified method CONFIG_STRING(JitDisasmAssemblies, W("JitDisasmAssemblies")) // Only show JitDisasm and related info for methods // from this semicolon-delimited list of assemblies. CONFIG_INTEGER(JitDisasmWithGC, W("JitDisasmWithGC"), 0) // Dump interleaved GC Info for any method disassembled. @@ -255,14 +249,13 @@ CONFIG_STRING(JitStressRange, W("JitStressRange")) // Internal Jit /// JIT Hardware Intrinsics /// CONFIG_INTEGER(EnableIncompleteISAClass, W("EnableIncompleteISAClass"), 0) // Enable testing not-yet-implemented - // intrinsic classes - -#else // defined(DEBUG) +#endif // defined(DEBUG) -// JitDisasm is supported in Release too CONFIG_METHODSET(JitDisasm, W("JitDisasm")) -#endif // !defined(DEBUG) - +// Print the process address next to each instruction of the disassembly +CONFIG_INTEGER(JitDasmWithAddress, W("JitDasmWithAddress"), 0) +// Print the alignment boundaries in disassembly. +CONFIG_INTEGER(JitDasmWithAlignmentBoundaries, W("JitDasmWithAlignmentBoundaries"), 0) CONFIG_INTEGER(JitDisasmSummary, W("JitDisasmSummary"), 0) // Prints all jitted methods to the console CONFIG_STRING(JitStdOutFile, W("JitStdOutFile")) // If set, sends JIT's stdout output to this file. From 2110ec05e3d06c9548f711cb475fb67dc0484171 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 25 Feb 2023 19:05:22 +0100 Subject: [PATCH 07/63] clean up --- src/coreclr/jit/importer.cpp | 99 +++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 47 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index a058cf6b92cfcf..5395ed63a269c2 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1771,24 +1771,6 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken return gtNewRuntimeLookupHelperCallNode(pRuntimeLookup, ctxTree, compileTimeHandle); } - if (pRuntimeLookup->testForNull) - { - assert(pRuntimeLookup->indirections != 0); - - impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling sideEffects")); - - GenTree* argNode = - gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); - GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); - - // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. - // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 - setMethodHasExpRuntimeLookup(); - - helperCall->SetExpRuntimeLookup(); - return helperCall; - } - // Slot pointer GenTree* slotPtrTree = ctxTree; @@ -1843,45 +1825,68 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken } // No null test required - if (pRuntimeLookup->indirections == 0) + if (!pRuntimeLookup->testForNull) { - return slotPtrTree; - } + if (pRuntimeLookup->indirections == 0) + { + return slotPtrTree; + } - slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); - slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; + slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); + slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; - if (!pRuntimeLookup->testForFixup) - { - return slotPtrTree; + if (!pRuntimeLookup->testForFixup) + { + return slotPtrTree; + } + + impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark0")); + + unsigned slotLclNum = lvaGrabTemp(true DEBUGARG("impRuntimeLookup test")); + impAssignTempGen(slotLclNum, slotPtrTree, NO_CLASS_HANDLE, CHECK_SPILL_ALL, nullptr, impCurStmtDI); + + GenTree* slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); + // downcast the pointer to a TYP_INT on 64-bit targets + slot = impImplicitIorI4Cast(slot, TYP_INT); + // Use a GT_AND to check for the lowest bit and indirect if it is set + GenTree* test = gtNewOperNode(GT_AND, TYP_INT, slot, gtNewIconNode(1)); + GenTree* relop = gtNewOperNode(GT_EQ, TYP_INT, test, gtNewIconNode(0)); + + // slot = GT_IND(slot - 1) + slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); + GenTree* add = gtNewOperNode(GT_ADD, TYP_I_IMPL, slot, gtNewIconNode(-1, TYP_I_IMPL)); + GenTree* indir = gtNewOperNode(GT_IND, TYP_I_IMPL, add); + indir->gtFlags |= GTF_IND_NONFAULTING; + indir->gtFlags |= GTF_IND_INVARIANT; + + slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); + GenTree* asg = gtNewAssignNode(slot, indir); + GenTreeColon* colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), asg); + GenTreeQmark* qmark = gtNewQmarkNode(TYP_VOID, relop, colon); + impAppendTree(qmark, CHECK_SPILL_NONE, impCurStmtDI); + + return gtNewLclvNode(slotLclNum, TYP_I_IMPL); } - impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark0")); + assert(pRuntimeLookup->indirections != 0); - unsigned slotLclNum = lvaGrabTemp(true DEBUGARG("impRuntimeLookup test")); - impAssignTempGen(slotLclNum, slotPtrTree, NO_CLASS_HANDLE, CHECK_SPILL_ALL, nullptr, impCurStmtDI); + impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark1")); - GenTree* slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); - // downcast the pointer to a TYP_INT on 64-bit targets - slot = impImplicitIorI4Cast(slot, TYP_INT); - // Use a GT_AND to check for the lowest bit and indirect if it is set - GenTree* test = gtNewOperNode(GT_AND, TYP_INT, slot, gtNewIconNode(1)); - GenTree* relop = gtNewOperNode(GT_EQ, TYP_INT, test, gtNewIconNode(0)); + // Extract the handle + GenTree* handleForNullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); + handleForNullCheck->gtFlags |= GTF_IND_NONFAULTING; - // slot = GT_IND(slot - 1) - slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); - GenTree* add = gtNewOperNode(GT_ADD, TYP_I_IMPL, slot, gtNewIconNode(-1, TYP_I_IMPL)); - GenTree* indir = gtNewOperNode(GT_IND, TYP_I_IMPL, add); - indir->gtFlags |= GTF_IND_NONFAULTING; - indir->gtFlags |= GTF_IND_INVARIANT; + // Call the helper + // - Setup argNode with the pointer to the signature returned by the lookup + GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); - slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); - GenTree* asg = gtNewAssignNode(slot, indir); - GenTreeColon* colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), asg); - GenTreeQmark* qmark = gtNewQmarkNode(TYP_VOID, relop, colon); - impAppendTree(qmark, CHECK_SPILL_NONE, impCurStmtDI); + GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); - return gtNewLclvNode(slotLclNum, TYP_I_IMPL); + // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. + // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 + setMethodHasExpRuntimeLookup(); + helperCall->SetExpRuntimeLookup(); + return helperCall; } struct RecursiveGuard From 54b224cdc0cba925d992bb36603cb79855722afc Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 25 Feb 2023 19:06:39 +0100 Subject: [PATCH 08/63] Revert "Enable JitDasmWithAlignmentBoundaries in Release" This reverts commit 22f34a1aedd63281dc610756906c3efc3bd192bc. --- src/coreclr/jit/compiler.cpp | 30 ++++++++++++++++-------------- src/coreclr/jit/compiler.h | 10 +++++----- src/coreclr/jit/emit.cpp | 11 ++++++++--- src/coreclr/jit/flowgraph.cpp | 6 ++++++ src/coreclr/jit/jitconfigvalues.h | 19 +++++++++++++------ 5 files changed, 48 insertions(+), 28 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 1130414704cd2c..20b2030c7d7f6b 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2828,17 +2828,17 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.compJitEarlyExpandMDArrays = (JitConfig.JitEarlyExpandMDArrays() != 0); - opts.disAsm = false; - opts.disDiffable = false; - opts.dspDiffable = false; - opts.disAlignment = false; - opts.disAddr = false; + opts.disAsm = false; + opts.disDiffable = false; + opts.dspDiffable = false; #ifdef DEBUG opts.dspInstrs = false; opts.dspLines = false; opts.varNames = false; opts.dmpHex = false; opts.disAsmSpilled = false; + opts.disAddr = false; + opts.disAlignment = false; opts.dspCode = false; opts.dspEHTable = false; opts.dspDebugInfo = false; @@ -2939,6 +2939,17 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.dspDiffable = true; } + // This one applies to both Ngen/Jit Disasm output: COMPlus_JitDasmWithAddress=1 + if (JitConfig.JitDasmWithAddress() != 0) + { + opts.disAddr = true; + } + + if (JitConfig.JitDasmWithAlignmentBoundaries() != 0) + { + opts.disAlignment = true; + } + if (JitConfig.JitLongAddress() != 0) { opts.compLongAddress = true; @@ -3032,15 +3043,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.disAsm = true; } #endif // !DEBUG - // This one applies to both Ngen/Jit Disasm output: COMPlus_JitDasmWithAddress=1 - if (JitConfig.JitDasmWithAddress() != 0) - { - opts.disAddr = true; - } - if (JitConfig.JitDasmWithAlignmentBoundaries() != 0) - { - opts.disAlignment = true; - } //------------------------------------------------------------------------- diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5b7acf123980a5..4ad0ab6b48e848 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9452,11 +9452,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool optRepeat; // Repeat optimizer phases k times #endif - bool disAsm; // Display native code as it is generated - bool dspDiffable; // Makes the Jit Dump 'diff-able' (currently uses same COMPlus_* flag as disDiffable) - bool disDiffable; // Makes the Disassembly code 'diff-able' - bool disAlignment; // Display alignment boundaries in disassembly code - bool disAddr; // Display process address next to each instruction in disassembly code + bool disAsm; // Display native code as it is generated + bool dspDiffable; // Makes the Jit Dump 'diff-able' (currently uses same COMPlus_* flag as disDiffable) + bool disDiffable; // Makes the Disassembly code 'diff-able' #ifdef DEBUG bool compProcedureSplittingEH; // Separate cold code from hot code for functions with EH bool dspCode; // Display native code generated @@ -9468,6 +9466,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool varNames; // Display variables names in native code output bool disAsmSpilled; // Display native code when any register spilling occurs bool disasmWithGC; // Display GC info interleaved with disassembly. + bool disAddr; // Display process address next to each instruction in disassembly code + bool disAlignment; // Display alignment boundaries in disassembly code bool disAsm2; // Display native code after it is generated using external disassembler bool dspOrder; // Display names of each of the methods that we ngen/jit bool dspUnwind; // Display the unwind info output diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 752d8454e0f2fa..6b230993aa692f 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -1445,10 +1445,12 @@ void emitter::appendToCurIG(instrDesc* id) void emitter::emitDispInsAddr(BYTE* code) { +#ifdef DEBUG if (emitComp->opts.disAddr) { printf(FMT_ADDR, DBG_ADDR(code)); } +#endif } void emitter::emitDispInsOffs(unsigned offs, bool doffs) @@ -7074,9 +7076,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, for (unsigned cnt = ig->igInsCnt; cnt > 0; cnt--) { +#ifdef DEBUG size_t curInstrAddr = (size_t)cp; instrDesc* curInstrDesc = id; -#ifdef DEBUG if ((emitComp->opts.disAsm || emitComp->verbose) && (JitConfig.JitDisasmWithDebugInfo() != 0) && (id->idCodeSize() > 0)) @@ -7110,9 +7112,9 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, size_t insSize = emitIssue1Instr(ig, id, &cp); emitAdvanceInstrDesc(&id, insSize); +#ifdef DEBUG // Print the alignment boundary - if ((emitComp->opts.disAsm INDEBUG(|| emitComp->verbose)) && - (emitComp->opts.disAddr || emitComp->opts.disAlignment)) + if ((emitComp->opts.disAsm || emitComp->verbose) && (emitComp->opts.disAddr || emitComp->opts.disAlignment)) { size_t afterInstrAddr = (size_t)cp; instruction curIns = curInstrDesc->idIns(); @@ -7195,6 +7197,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, } } } +#endif // DEBUG } #ifdef DEBUG @@ -8114,10 +8117,12 @@ void emitter::emitDispDataSec(dataSecDsc* section, BYTE* dst) for (dataSection* data = section->dsdList; data != nullptr; data = data->dsNext) { +#ifdef DEBUG if (emitComp->opts.disAddr) { printf("; @" FMT_ADDR "\n", DBG_ADDR(dst)); } +#endif const char* labelFormat = "%-7s"; char label[64]; diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 4bcc906f55a26b..7ebdd9dc6c250e 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -78,6 +78,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } assert(tree->IsHelperCall()); + if (ISMETHOD("Test")) + { + gtDispTree(tree); + printf(""); + } + // TODO: expand runtime lookups into: // // isNull ? helperCall : IND(fastpath) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index fbfea56f0b32b8..4c44490bb35ed5 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -22,7 +22,9 @@ CONFIG_INTEGER(BreakOnDumpToken, W("BreakOnDumpToken"), 0xffffffff) // Breaks wh // particular token value. CONFIG_INTEGER(DebugBreakOnVerificationFailure, W("DebugBreakOnVerificationFailure"), 0) // Halts the jit on // verification failure -CONFIG_INTEGER(DiffableDasm, W("JitDiffableDasm"), 0) // Make the disassembly diff-able +CONFIG_INTEGER(DiffableDasm, W("JitDiffableDasm"), 0) // Make the disassembly diff-able +CONFIG_INTEGER(JitDasmWithAddress, W("JitDasmWithAddress"), 0) // Print the process address next to each instruction of + // the disassembly CONFIG_INTEGER(DisplayLoopHoistStats, W("JitLoopHoistStats"), 0) // Display JIT loop hoisting statistics CONFIG_INTEGER(DisplayLsraStats, W("JitLsraStats"), 0) // Display JIT Linear Scan Register Allocator statistics // If set to "1", display the stats in textual format. @@ -78,6 +80,9 @@ CONFIG_INTEGER(JitUnrollLoopMaxIterationCount, W("JitUnrollLoopMaxIterationCount"), DEFAULT_UNROLL_LOOP_MAX_ITERATION_COUNT) +// Print the alignment boundaries in disassembly. +CONFIG_INTEGER(JitDasmWithAlignmentBoundaries, W("JitDasmWithAlignmentBoundaries"), 0) + CONFIG_INTEGER(JitDirectAlloc, W("JitDirectAlloc"), 0) CONFIG_INTEGER(JitDoubleAlign, W("JitDoubleAlign"), 1) CONFIG_INTEGER(JitDumpASCII, W("JitDumpASCII"), 1) // Uses only ASCII characters in tree dumps @@ -185,6 +190,7 @@ CONFIG_INTEGER(TreesBeforeAfterMorph, W("JitDumpBeforeAfterMorph"), 0) // If 1, CONFIG_METHODSET(JitBreak, W("JitBreak")) // Stops in the importer when compiling a specified method CONFIG_METHODSET(JitDebugBreak, W("JitDebugBreak")) +CONFIG_METHODSET(JitDisasm, W("JitDisasm")) // Dumps disassembly for specified method CONFIG_STRING(JitDisasmAssemblies, W("JitDisasmAssemblies")) // Only show JitDisasm and related info for methods // from this semicolon-delimited list of assemblies. CONFIG_INTEGER(JitDisasmWithGC, W("JitDisasmWithGC"), 0) // Dump interleaved GC Info for any method disassembled. @@ -249,13 +255,14 @@ CONFIG_STRING(JitStressRange, W("JitStressRange")) // Internal Jit /// JIT Hardware Intrinsics /// CONFIG_INTEGER(EnableIncompleteISAClass, W("EnableIncompleteISAClass"), 0) // Enable testing not-yet-implemented -#endif // defined(DEBUG) + // intrinsic classes + +#else // defined(DEBUG) +// JitDisasm is supported in Release too CONFIG_METHODSET(JitDisasm, W("JitDisasm")) -// Print the process address next to each instruction of the disassembly -CONFIG_INTEGER(JitDasmWithAddress, W("JitDasmWithAddress"), 0) -// Print the alignment boundaries in disassembly. -CONFIG_INTEGER(JitDasmWithAlignmentBoundaries, W("JitDasmWithAlignmentBoundaries"), 0) +#endif // !defined(DEBUG) + CONFIG_INTEGER(JitDisasmSummary, W("JitDisasmSummary"), 0) // Prints all jitted methods to the console CONFIG_STRING(JitStdOutFile, W("JitStdOutFile")) // If set, sends JIT's stdout output to this file. From e4d49447394876ecc3fbb41b4c26cb3012d91c85 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 25 Feb 2023 19:49:21 +0100 Subject: [PATCH 09/63] add hashtable --- src/coreclr/jit/compiler.cpp | 1 + src/coreclr/jit/compiler.h | 11 +++++++++++ src/coreclr/jit/flowgraph.cpp | 27 ++++++++++++++++----------- src/coreclr/jit/importer.cpp | 4 ++++ 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 20b2030c7d7f6b..2b457f766fac42 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1950,6 +1950,7 @@ void Compiler::compInit(ArenaAllocator* pAlloc, vnStore = nullptr; m_outlinedCompositeSsaNums = nullptr; m_nodeToLoopMemoryBlockMap = nullptr; + m_signatureToLookupInfoMap = nullptr; fgSsaPassesCompleted = 0; fgSsaChecksEnabled = false; fgVNPassesCompleted = 0; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4ad0ab6b48e848..e0eaaefaba53a2 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4900,6 +4900,17 @@ class Compiler return m_nodeToLoopMemoryBlockMap; } + typedef JitHashTable, CORINFO_LOOKUP*> SignatureToLookupInfoMap; + SignatureToLookupInfoMap* m_signatureToLookupInfoMap; + SignatureToLookupInfoMap* GetSignatureToLookupInfoMap() + { + if (m_signatureToLookupInfoMap == nullptr) + { + m_signatureToLookupInfoMap = new (getAllocator()) SignatureToLookupInfoMap(getAllocator()); + } + return m_signatureToLookupInfoMap; + } + void optRecordLoopMemoryDependence(GenTree* tree, BasicBlock* block, ValueNum memoryVN); void optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree); diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 7ebdd9dc6c250e..12013143788101 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -78,19 +78,24 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } assert(tree->IsHelperCall()); - if (ISMETHOD("Test")) + GenTreeCall* call = tree->AsCall(); + GenTree* ctx = call->gtArgs.GetArgByIndex(0)->GetNode(); + GenTreeIntCon* signature = call->gtArgs.GetArgByIndex(1)->GetNode()->AsIntCon(); + + CORINFO_LOOKUP* pLookup = nullptr; + bool found = GetSignatureToLookupInfoMap()->Lookup((void*)signature->IconValue(), &pLookup); + assert(found); + const CORINFO_RUNTIME_LOOKUP* pRuntimeLookup = &pLookup->runtimeLookup; + assert(pRuntimeLookup->indirections != 0); + + if (pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK) { - gtDispTree(tree); - printf(""); + // dynamic expansion + } + else + { + // no dynamic expansion } - - // TODO: expand runtime lookups into: - // - // isNull ? helperCall : IND(fastpath) - // - // or (for dynamic expansion): - // - // isNull ? helperCall : (sizeCheck ? IND(fastPath) : helperCall) } } } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 5395ed63a269c2..09626bbc710ce5 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1886,6 +1886,10 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 setMethodHasExpRuntimeLookup(); helperCall->SetExpRuntimeLookup(); + if (!GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) + { + GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, pLookup); + } return helperCall; } From 3e272e0d1870d841faac1156426b35c5d2dd55ee Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 25 Feb 2023 20:28:49 +0100 Subject: [PATCH 10/63] test --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/flowgraph.cpp | 42 ++++++++++++++++++++++++++--------- src/coreclr/jit/importer.cpp | 4 ++-- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e0eaaefaba53a2..36c212c650f90b 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4900,7 +4900,7 @@ class Compiler return m_nodeToLoopMemoryBlockMap; } - typedef JitHashTable, CORINFO_LOOKUP*> SignatureToLookupInfoMap; + typedef JitHashTable, CORINFO_RUNTIME_LOOKUP> SignatureToLookupInfoMap; SignatureToLookupInfoMap* m_signatureToLookupInfoMap; SignatureToLookupInfoMap* GetSignatureToLookupInfoMap() { diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 12013143788101..23ce2096541711 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -78,23 +78,45 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } assert(tree->IsHelperCall()); - GenTreeCall* call = tree->AsCall(); - GenTree* ctx = call->gtArgs.GetArgByIndex(0)->GetNode(); - GenTreeIntCon* signature = call->gtArgs.GetArgByIndex(1)->GetNode()->AsIntCon(); + GenTreeCall* call = tree->AsCall(); + assert(call->gtArgs.CountArgs() == 2); - CORINFO_LOOKUP* pLookup = nullptr; - bool found = GetSignatureToLookupInfoMap()->Lookup((void*)signature->IconValue(), &pLookup); - assert(found); - const CORINFO_RUNTIME_LOOKUP* pRuntimeLookup = &pLookup->runtimeLookup; - assert(pRuntimeLookup->indirections != 0); + GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); + GenTree* sigTree = call->gtArgs.GetArgByIndex(1)->GetNode(); - if (pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK) + void* signature = nullptr; + if (sigTree->IsCnsIntOrI()) + { + signature = (void*)sigTree->AsIntCon()->IconValue(); + } + else + { + if (vnStore->IsVNConstant(sigTree->gtVNPair.GetLiberal())) + { + signature = (void*)vnStore->CoercedConstantValue(sigTree->gtVNPair.GetLiberal()); + } + else + { + assert(!"can't restore signature argument value"); + continue; + } + } + + assert(signature != nullptr); + + CORINFO_RUNTIME_LOOKUP runtimeLookup = {}; + if (!GetSignatureToLookupInfoMap()->Lookup(signature, &runtimeLookup)) + { + continue; + } + + if (runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK) { // dynamic expansion } else { - // no dynamic expansion + // normal expansion } } } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 09626bbc710ce5..4666cbd68c810b 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1878,8 +1878,8 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken // Call the helper // - Setup argNode with the pointer to the signature returned by the lookup + assert(pRuntimeLookup->signature != nullptr); GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); - GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. @@ -1888,7 +1888,7 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken helperCall->SetExpRuntimeLookup(); if (!GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) { - GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, pLookup); + GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); } return helperCall; } From 2671e14e18d94421aa0f2942b883d01005106622 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 26 Feb 2023 00:41:30 +0100 Subject: [PATCH 11/63] Initial version (no dynamic expansion for now) --- src/coreclr/jit/flowgraph.cpp | 143 ++++++++++++++++++++++++++++++++-- src/coreclr/jit/gentree.h | 5 ++ 2 files changed, 143 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 23ce2096541711..b119d64d9d541f 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -66,11 +66,14 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() return result; } - for (BasicBlock* const block : Blocks()) + BasicBlock* prevBb = nullptr; + for (BasicBlock* block : Blocks()) { + TRAVERSE_BLOCK_AGAIN: + Statement* prevStmt = nullptr; for (Statement* const stmt : block->Statements()) { - for (GenTree* tree = stmt->GetTreeList(); tree != nullptr; tree = tree->gtNext) + for (GenTree* const tree : stmt->TreeList()) { if (!tree->IsCall() || !tree->AsCall()->IsExpRuntimeLookup()) { @@ -79,6 +82,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() assert(tree->IsHelperCall()); GenTreeCall* call = tree->AsCall(); + call->ClearExpRuntimeLookup(); assert(call->gtArgs.CountArgs() == 2); GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); @@ -112,14 +116,143 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if (runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK) { - // dynamic expansion + // TODO: impelement dynamic expansion + continue; } - else + + assert(runtimeLookup.indirections != 0); + assert(runtimeLookup.testForNull); + + unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); + lvaGetDesc(rtLookupLclNum)->lvType = TYP_I_IMPL; + GenTreeLclVar* rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); + + if (prevBb == nullptr) + { + // We're going to emit a BB in front of fgFirstBB + fgEnsureFirstBBisScratch(); + prevBb = fgFirstBB; + } + + if (prevBb == block) + { + // Unlikely event: current block is a scratch block + continue; + } + + if (prevStmt == nullptr) { - // normal expansion + prevStmt = fgNewStmtFromTree(gtNewNothingNode()); + fgInsertStmtAtBeg(block, prevStmt); } + + if (!ctxTree->OperIs(GT_LCL_VAR)) + { + fgMakeMultiUse(&ctxTree); + } + + GenTree* slotPtrTree = ctxTree; + GenTree* indOffTree = nullptr; + for (WORD i = 0; i < runtimeLookup.indirections; i++) + { + if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) + { + indOffTree = fgMakeMultiUse(&slotPtrTree); + } + + // The last indirection could be subject to a size check (dynamic dictionary expansion) + bool isLastIndirectionWithSizeCheck = + ((i == runtimeLookup.indirections - 1) && (runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK)); + + if (i != 0) + { + slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); + slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; + if (!isLastIndirectionWithSizeCheck) + { + slotPtrTree->gtFlags |= GTF_IND_INVARIANT; + } + } + + if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) + { + slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, indOffTree, slotPtrTree); + } + + if (runtimeLookup.offsets[i] != 0) + { + slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, slotPtrTree, + gtNewIconNode(runtimeLookup.offsets[i], TYP_I_IMPL)); + } + } + + prevBb = block; + block = fgSplitBlockAfterStatement(prevBb, prevStmt); + + BasicBlock* nullcheckBb = fgNewBBafter(BBJ_COND, prevBb, true); + nullcheckBb->bbFlags |= (BBF_INTERNAL | BBF_HAS_JMP); + + GenTree* handleForNullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); + handleForNullCheck->gtFlags |= GTF_IND_NONFAULTING; + + GenTree* nullcheckOp = gtNewOperNode(GT_NE, TYP_INT, handleForNullCheck, gtNewIconNode(0, TYP_I_IMPL)); + nullcheckOp->gtFlags |= (GTF_RELOP_JMP_USED); + gtSetEvalOrder(nullcheckOp); + Statement* nullcheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp)); + gtSetStmtInfo(nullcheckStmt); + fgSetStmtSeq(nullcheckStmt); + fgInsertStmtAtEnd(nullcheckBb, nullcheckStmt); + + BasicBlock* fastPathBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); + fastPathBb->bbFlags |= (BBF_INTERNAL); + GenTree* asgTree = gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(handleForNullCheck)); + Statement* asgStmt = fgNewStmtFromTree(asgTree); + fgInsertStmtAtBeg(fastPathBb, asgStmt); + gtSetStmtInfo(asgStmt); + fgSetStmtSeq(asgStmt); + + BasicBlock* fallbackBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); + fallbackBb->bbFlags |= (BBF_INTERNAL); + GenTree* asgTree2 = gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(call)); + Statement* asgStmt2 = fgNewStmtFromTree(asgTree2); + fgInsertStmtAtBeg(fallbackBb, asgStmt2); + gtSetStmtInfo(asgStmt2); + fgSetStmtSeq(asgStmt2); + + // Replace ExpRuntimeLookup call with a local + call->ReplaceWith(gtNewLclvNode(rtLookupLclNum, call->TypeGet()), this); + gtUpdateTreeAncestorsSideEffects(call); + gtSetStmtInfo(stmt); + fgSetStmtSeq(stmt); + + // Connect all new blocks together + fgAddRefPred(nullcheckBb, prevBb); + fgAddRefPred(fallbackBb, nullcheckBb); + fgAddRefPred(fastPathBb, nullcheckBb); + fgRemoveRefPred(block, prevBb); + fgAddRefPred(block, fastPathBb); + fgAddRefPred(block, fallbackBb); + nullcheckBb->bbJumpDest = fastPathBb; + fallbackBb->bbJumpDest = block; + fastPathBb->bbJumpDest = block; + + // Re-distribute weights + nullcheckBb->inheritWeight(prevBb); + fallbackBb->inheritWeightPercentage(nullcheckBb, 20); // TODO: Consider making it cold (0%) + fastPathBb->inheritWeightPercentage(nullcheckBb, 80); + block->inheritWeight(prevBb); + + assert(BasicBlock::sameEHRegion(prevBb, block)); + assert(BasicBlock::sameEHRegion(prevBb, nullcheckBb)); + assert(BasicBlock::sameEHRegion(prevBb, fastPathBb)); + + // prevBb = helperCallBb; + result = PhaseStatus::MODIFIED_EVERYTHING; + goto TRAVERSE_BLOCK_AGAIN; } + prevStmt = stmt; } + prevBb = block; } return result; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 88203043b639c0..9161226118cd9c 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5530,6 +5530,11 @@ struct GenTreeCall final : public GenTree gtCallMoreFlags |= GTF_CALL_M_EXP_RUNTIME_LOOKUP; } + void ClearExpRuntimeLookup() + { + gtCallMoreFlags &= ~GTF_CALL_M_EXP_RUNTIME_LOOKUP; + } + bool IsExpRuntimeLookup() const { return (gtCallMoreFlags & GTF_CALL_M_EXP_RUNTIME_LOOKUP) != 0; From 64ae1497521c330dc97a10b6f1a580be20a35bae Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 26 Feb 2023 00:47:43 +0100 Subject: [PATCH 12/63] Clean up --- src/coreclr/jit/flowgraph.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index b119d64d9d541f..239cde5578ce05 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -196,7 +196,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() handleForNullCheck->gtFlags |= GTF_IND_NONFAULTING; GenTree* nullcheckOp = gtNewOperNode(GT_NE, TYP_INT, handleForNullCheck, gtNewIconNode(0, TYP_I_IMPL)); - nullcheckOp->gtFlags |= (GTF_RELOP_JMP_USED); + nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; gtSetEvalOrder(nullcheckOp); Statement* nullcheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp)); gtSetStmtInfo(nullcheckStmt); @@ -204,20 +204,20 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fgInsertStmtAtEnd(nullcheckBb, nullcheckStmt); BasicBlock* fastPathBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); - fastPathBb->bbFlags |= (BBF_INTERNAL); - GenTree* asgTree = gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(handleForNullCheck)); - Statement* asgStmt = fgNewStmtFromTree(asgTree); - fgInsertStmtAtBeg(fastPathBb, asgStmt); - gtSetStmtInfo(asgStmt); - fgSetStmtSeq(asgStmt); + fastPathBb->bbFlags |= BBF_INTERNAL; + GenTree* asgFastPathValue = gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(handleForNullCheck)); + Statement* asgFastPathValueStmt = fgNewStmtFromTree(asgFastPathValue); + fgInsertStmtAtBeg(fastPathBb, asgFastPathValueStmt); + gtSetStmtInfo(asgFastPathValueStmt); + fgSetStmtSeq(asgFastPathValueStmt); BasicBlock* fallbackBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); - fallbackBb->bbFlags |= (BBF_INTERNAL); - GenTree* asgTree2 = gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(call)); - Statement* asgStmt2 = fgNewStmtFromTree(asgTree2); - fgInsertStmtAtBeg(fallbackBb, asgStmt2); - gtSetStmtInfo(asgStmt2); - fgSetStmtSeq(asgStmt2); + fallbackBb->bbFlags |= BBF_INTERNAL; + GenTree* asgFallbackTree = gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(call)); + Statement* asgFallbackStmt = fgNewStmtFromTree(asgFallbackTree); + fgInsertStmtAtBeg(fallbackBb, asgFallbackStmt); + gtSetStmtInfo(asgFallbackStmt); + fgSetStmtSeq(asgFallbackStmt); // Replace ExpRuntimeLookup call with a local call->ReplaceWith(gtNewLclvNode(rtLookupLclNum, call->TypeGet()), this); @@ -246,7 +246,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() assert(BasicBlock::sameEHRegion(prevBb, nullcheckBb)); assert(BasicBlock::sameEHRegion(prevBb, fastPathBb)); - // prevBb = helperCallBb; + prevBb = fastPathBb; result = PhaseStatus::MODIFIED_EVERYTHING; goto TRAVERSE_BLOCK_AGAIN; } From d9915332677f28999a402ddf132794a2d7b2fc27 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 26 Feb 2023 01:58:23 +0100 Subject: [PATCH 13/63] Clean up --- src/coreclr/jit/flowgraph.cpp | 83 ++++++++++++++++++++++++++--------- src/coreclr/jit/utils.cpp | 7 ++- 2 files changed, 66 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 239cde5578ce05..7617ea70942aba 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -61,7 +61,9 @@ static bool blockNeedsGCPoll(BasicBlock* block) PhaseStatus Compiler::fgExpandRuntimeLookups() { PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; - if (!doesMethodHaveExpRuntimeLookup()) + + // Current method doesn't have runtime lookups - bail out. + if (!doesMethodHaveExpRuntimeLookup() || !ISMETHOD("Test")) { return result; } @@ -75,6 +77,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() { for (GenTree* const tree : stmt->TreeList()) { + // We only need calls with IsExpRuntimeLookup() flag if (!tree->IsCall() || !tree->AsCall()->IsExpRuntimeLookup()) { continue; @@ -85,6 +88,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() call->ClearExpRuntimeLookup(); assert(call->gtArgs.CountArgs() == 2); + // call(ctx, signature); GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); GenTree* sigTree = call->gtArgs.GetArgByIndex(1)->GetNode(); @@ -95,12 +99,15 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } else { + // signature is not a constant (CSE'd?) - let's see if we can access it via VN if (vnStore->IsVNConstant(sigTree->gtVNPair.GetLiberal())) { signature = (void*)vnStore->CoercedConstantValue(sigTree->gtVNPair.GetLiberal()); } else { + // Technically, it is possible (e.g. it was CSE'd and then VN was erased), but for Debug mode we + // want to catch such cases as we really don't want to emit just a fallback call - it's too slow assert(!"can't restore signature argument value"); continue; } @@ -114,18 +121,19 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() continue; } - if (runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK) + const bool needsSizeCheck = runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK; + if (needsSizeCheck) { - // TODO: impelement dynamic expansion + // TODO: implement dynamic expansion continue; } assert(runtimeLookup.indirections != 0); assert(runtimeLookup.testForNull); - unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); - lvaGetDesc(rtLookupLclNum)->lvType = TYP_I_IMPL; - GenTreeLclVar* rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); + const unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); + lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; + GenTreeLclVar* rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); if (prevBb == nullptr) { @@ -146,11 +154,14 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fgInsertStmtAtBeg(block, prevStmt); } + // Save ctxTree to a local if it's complex if (!ctxTree->OperIs(GT_LCL_VAR)) { + // TODO: consider replacing fgMakeMultiUse here and below with statement inside nullcheckBb fgMakeMultiUse(&ctxTree); } + // Prepare slotPtr tree GenTree* slotPtrTree = ctxTree; GenTree* indOffTree = nullptr; for (WORD i = 0; i < runtimeLookup.indirections; i++) @@ -160,12 +171,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() indOffTree = fgMakeMultiUse(&slotPtrTree); } - // The last indirection could be subject to a size check (dynamic dictionary expansion) - bool isLastIndirectionWithSizeCheck = - ((i == runtimeLookup.indirections - 1) && (runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK)); - if (i != 0) { + // The last indirection could be subject to a size check (dynamic dictionary expansion) + bool isLastIndirectionWithSizeCheck = ((i == runtimeLookup.indirections - 1) && + (runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK)); + slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; if (!isLastIndirectionWithSizeCheck) @@ -189,13 +200,34 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() prevBb = block; block = fgSplitBlockAfterStatement(prevBb, prevStmt); + // + // prevBb(BBJ_NONE): + // ... + // + // nullcheckBb(BBJ_COND): + // if (fastPathValue != 0) + // goto fastPathBb; + // + // fallbackBb(BBJ_ALWAYS): + // rtLookupLcl = HelperCall(); + // goto block; + // + // fastPathBb(BBJ_NONE): + // rtLookupLcl = fastPathValue; + // + // block(...): + // use(rtLookupLcl); + // + + // null-check basic block BasicBlock* nullcheckBb = fgNewBBafter(BBJ_COND, prevBb, true); nullcheckBb->bbFlags |= (BBF_INTERNAL | BBF_HAS_JMP); - GenTree* handleForNullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); - handleForNullCheck->gtFlags |= GTF_IND_NONFAULTING; + GenTree* fastPathValue = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); + fastPathValue->gtFlags |= GTF_IND_NONFAULTING; + GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); - GenTree* nullcheckOp = gtNewOperNode(GT_NE, TYP_INT, handleForNullCheck, gtNewIconNode(0, TYP_I_IMPL)); + GenTree* nullcheckOp = gtNewOperNode(GT_NE, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; gtSetEvalOrder(nullcheckOp); Statement* nullcheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp)); @@ -203,23 +235,31 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fgSetStmtSeq(nullcheckStmt); fgInsertStmtAtEnd(nullcheckBb, nullcheckStmt); - BasicBlock* fastPathBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); + BasicBlock* sizeCheckBb = nullptr; + if (needsSizeCheck) + { + // TODO: + } + + // Fast-path basic block + BasicBlock* fastPathBb = fgNewBBafter(BBJ_NONE, nullcheckBb, true); fastPathBb->bbFlags |= BBF_INTERNAL; - GenTree* asgFastPathValue = gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(handleForNullCheck)); - Statement* asgFastPathValueStmt = fgNewStmtFromTree(asgFastPathValue); + Statement* asgFastPathValueStmt = + fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone)); fgInsertStmtAtBeg(fastPathBb, asgFastPathValueStmt); gtSetStmtInfo(asgFastPathValueStmt); fgSetStmtSeq(asgFastPathValueStmt); + // Fallback basic block BasicBlock* fallbackBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); fallbackBb->bbFlags |= BBF_INTERNAL; - GenTree* asgFallbackTree = gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(call)); - Statement* asgFallbackStmt = fgNewStmtFromTree(asgFallbackTree); + Statement* asgFallbackStmt = + fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(call))); fgInsertStmtAtBeg(fallbackBb, asgFallbackStmt); gtSetStmtInfo(asgFallbackStmt); fgSetStmtSeq(asgFallbackStmt); - // Replace ExpRuntimeLookup call with a local + // Replace call with rtLookupLclNum local call->ReplaceWith(gtNewLclvNode(rtLookupLclNum, call->TypeGet()), this); gtUpdateTreeAncestorsSideEffects(call); gtSetStmtInfo(stmt); @@ -234,7 +274,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fgAddRefPred(block, fallbackBb); nullcheckBb->bbJumpDest = fastPathBb; fallbackBb->bbJumpDest = block; - fastPathBb->bbJumpDest = block; // Re-distribute weights nullcheckBb->inheritWeight(prevBb); @@ -242,10 +281,14 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fastPathBb->inheritWeightPercentage(nullcheckBb, 80); block->inheritWeight(prevBb); + // All blocks are expected to be in the same EH region assert(BasicBlock::sameEHRegion(prevBb, block)); assert(BasicBlock::sameEHRegion(prevBb, nullcheckBb)); assert(BasicBlock::sameEHRegion(prevBb, fastPathBb)); + // Scan current block again, the current call will be ignored because of ClearExpRuntimeLookup. + // We don't try to re-use expansions for the same lookups in the current block here - CSE is responsible + // for that prevBb = fastPathBb; result = PhaseStatus::MODIFIED_EVERYTHING; goto TRAVERSE_BLOCK_AGAIN; diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 4c75d372b39ac4..5d177ba592be66 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1345,10 +1345,9 @@ void HelperCallProperties::init() case CORINFO_HELP_RUNTIMEHANDLE_CLASS_LOG: case CORINFO_HELP_READYTORUN_GENERIC_HANDLE: // logging helpers are not technically pure but can be optimized away - isPure = false; - noThrow = false; - nonNullReturn = false; - mutatesHeap = true; + isPure = true; + noThrow = true; + nonNullReturn = true; break; // type casting helpers From e20dd2d57ed9dd3e23caba70e7f61cc102e27636 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 26 Feb 2023 09:22:51 +0100 Subject: [PATCH 14/63] Test --- src/coreclr/jit/flowgraph.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 7617ea70942aba..d0cf7b4ab6bbb6 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -63,7 +63,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; // Current method doesn't have runtime lookups - bail out. - if (!doesMethodHaveExpRuntimeLookup() || !ISMETHOD("Test")) + if (!doesMethodHaveExpRuntimeLookup()) { return result; } @@ -198,7 +198,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } prevBb = block; - block = fgSplitBlockAfterStatement(prevBb, prevStmt); + // TODO: use fgSplitBlockAfterStatement to be more precise + block = fgSplitBlockAtBeginning(prevBb); // // prevBb(BBJ_NONE): From 8fc03740042c6a1b6df07b1cd1f5d3b1d8a73494 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 26 Feb 2023 10:50:58 +0100 Subject: [PATCH 15/63] Update BB flags properly --- src/coreclr/jit/flowgraph.cpp | 39 ++++++++++++----------------------- src/coreclr/jit/utils.cpp | 2 +- 2 files changed, 14 insertions(+), 27 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index d0cf7b4ab6bbb6..c8f93ff1f8ca81 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -68,7 +68,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() return result; } - BasicBlock* prevBb = nullptr; for (BasicBlock* block : Blocks()) { TRAVERSE_BLOCK_AGAIN: @@ -87,6 +86,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() GenTreeCall* call = tree->AsCall(); call->ClearExpRuntimeLookup(); assert(call->gtArgs.CountArgs() == 2); + assert(!call->IsTailCall()); // We don't expect it here // call(ctx, signature); GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); @@ -135,25 +135,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; GenTreeLclVar* rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); - if (prevBb == nullptr) - { - // We're going to emit a BB in front of fgFirstBB - fgEnsureFirstBBisScratch(); - prevBb = fgFirstBB; - } - - if (prevBb == block) - { - // Unlikely event: current block is a scratch block - continue; - } - - if (prevStmt == nullptr) - { - prevStmt = fgNewStmtFromTree(gtNewNothingNode()); - fgInsertStmtAtBeg(block, prevStmt); - } - // Save ctxTree to a local if it's complex if (!ctxTree->OperIs(GT_LCL_VAR)) { @@ -197,9 +178,15 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } } - prevBb = block; - // TODO: use fgSplitBlockAfterStatement to be more precise - block = fgSplitBlockAtBeginning(prevBb); + BasicBlockFlags originalFlags = block->bbFlags; + BasicBlock* prevBb = block; + block = prevStmt == nullptr ? fgSplitBlockAtBeginning(prevBb) + : fgSplitBlockAfterStatement(prevBb, prevStmt); + + prevBb->bbFlags = + originalFlags & (~(BBF_SPLIT_LOST | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL) | BBF_GC_SAFE_POINT); + block->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | + BBF_LOOP_PREHEADER | BBF_RETLESS_CALL); // // prevBb(BBJ_NONE): @@ -243,7 +230,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } // Fast-path basic block - BasicBlock* fastPathBb = fgNewBBafter(BBJ_NONE, nullcheckBb, true); + BasicBlock* fastPathBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); fastPathBb->bbFlags |= BBF_INTERNAL; Statement* asgFastPathValueStmt = fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone)); @@ -274,6 +261,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fgAddRefPred(block, fastPathBb); fgAddRefPred(block, fallbackBb); nullcheckBb->bbJumpDest = fastPathBb; + fastPathBb->bbJumpDest = block; fallbackBb->bbJumpDest = block; // Re-distribute weights @@ -290,14 +278,13 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Scan current block again, the current call will be ignored because of ClearExpRuntimeLookup. // We don't try to re-use expansions for the same lookups in the current block here - CSE is responsible // for that - prevBb = fastPathBb; result = PhaseStatus::MODIFIED_EVERYTHING; goto TRAVERSE_BLOCK_AGAIN; } prevStmt = stmt; } - prevBb = block; } + return result; } diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 5d177ba592be66..f1ca33dc6c052c 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1345,7 +1345,7 @@ void HelperCallProperties::init() case CORINFO_HELP_RUNTIMEHANDLE_CLASS_LOG: case CORINFO_HELP_READYTORUN_GENERIC_HANDLE: // logging helpers are not technically pure but can be optimized away - isPure = true; + isPure = false; noThrow = true; nonNullReturn = true; break; From 269cb366b7b1ca5dde688a963d4fbe6eb6521d3c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 26 Feb 2023 14:44:29 +0100 Subject: [PATCH 16/63] Add dynamic expansion path --- src/coreclr/jit/flowgraph.cpp | 168 ++++++++++++++++++++++++++-------- 1 file changed, 132 insertions(+), 36 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index c8f93ff1f8ca81..42a8396539b95f 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -142,9 +142,10 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fgMakeMultiUse(&ctxTree); } - // Prepare slotPtr tree - GenTree* slotPtrTree = ctxTree; - GenTree* indOffTree = nullptr; + // Prepare slotPtr tree (TODO: consider sharing this part with impRuntimeLookup) + GenTree* slotPtrTree = ctxTree; + GenTree* indOffTree = nullptr; + GenTree* lastIndOfTree = nullptr; for (WORD i = 0; i < runtimeLookup.indirections; i++) { if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) @@ -152,11 +153,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() indOffTree = fgMakeMultiUse(&slotPtrTree); } + // The last indirection could be subject to a size check (dynamic dictionary expansion) + bool isLastIndirectionWithSizeCheck = ((i == runtimeLookup.indirections - 1) && + (runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK)); + if (i != 0) { - // The last indirection could be subject to a size check (dynamic dictionary expansion) - bool isLastIndirectionWithSizeCheck = ((i == runtimeLookup.indirections - 1) && - (runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK)); slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; @@ -173,6 +175,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if (runtimeLookup.offsets[i] != 0) { + if (isLastIndirectionWithSizeCheck) + { + lastIndOfTree = impCloneExpr(slotPtrTree, &slotPtrTree, NO_CLASS_HANDLE, CHECK_SPILL_ALL, + nullptr DEBUGARG("impRuntimeLookup indirectOffset")); + } + slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, slotPtrTree, gtNewIconNode(runtimeLookup.offsets[i], TYP_I_IMPL)); } @@ -188,22 +196,23 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() block->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL); + // Non-dynamic expansion case (no size check): // - // prevBb(BBJ_NONE): + // prevBb(BBJ_NONE): [weight: 1.0] // ... // - // nullcheckBb(BBJ_COND): - // if (fastPathValue != 0) - // goto fastPathBb; + // nullcheckBb(BBJ_COND): [weight: 1.0] + // if (fastPathValue == 0) + // goto fallbackBb; // - // fallbackBb(BBJ_ALWAYS): - // rtLookupLcl = HelperCall(); + // fastPathBb(BBJ_ALWAYS): [weight: 0.8] + // rtLookupLcl = fastPathValue; // goto block; // - // fastPathBb(BBJ_NONE): - // rtLookupLcl = fastPathValue; + // fallbackBb(BBJ_NONE): [weight: 0.2] + // rtLookupLcl = HelperCall(); // - // block(...): + // block(...): [weight: 1.0] // use(rtLookupLcl); // @@ -215,7 +224,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fastPathValue->gtFlags |= GTF_IND_NONFAULTING; GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); - GenTree* nullcheckOp = gtNewOperNode(GT_NE, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); + GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; gtSetEvalOrder(nullcheckOp); Statement* nullcheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp)); @@ -223,11 +232,14 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fgSetStmtSeq(nullcheckStmt); fgInsertStmtAtEnd(nullcheckBb, nullcheckStmt); - BasicBlock* sizeCheckBb = nullptr; - if (needsSizeCheck) - { - // TODO: - } + // Fallback basic block + BasicBlock* fallbackBb = fgNewBBafter(BBJ_NONE, nullcheckBb, true); + fallbackBb->bbFlags |= BBF_INTERNAL; + Statement* asgFallbackStmt = + fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(call))); + fgInsertStmtAtBeg(fallbackBb, asgFallbackStmt); + gtSetStmtInfo(asgFallbackStmt); + fgSetStmtSeq(asgFallbackStmt); // Fast-path basic block BasicBlock* fastPathBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); @@ -238,14 +250,53 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() gtSetStmtInfo(asgFastPathValueStmt); fgSetStmtSeq(asgFastPathValueStmt); - // Fallback basic block - BasicBlock* fallbackBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); - fallbackBb->bbFlags |= BBF_INTERNAL; - Statement* asgFallbackStmt = - fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(call))); - fgInsertStmtAtBeg(fallbackBb, asgFallbackStmt); - gtSetStmtInfo(asgFallbackStmt); - fgSetStmtSeq(asgFallbackStmt); + BasicBlock* sizeCheckBb = nullptr; + if (needsSizeCheck) + { + // Dynamic expansion case (sizeCheckBb is added and some preds are changed): + // + // prevBb(BBJ_NONE): [weight: 1.0] + // ... + // + // nullcheckBb(BBJ_COND): [weight: 1.0] + // if (fastPathValue == 0) + // goto fallbackBb; + // + // sizeCheckBb(BBJ_COND): [weight: 0.8] + // if (fastPathValue == 0) + // goto fallbackBb; + // + // fastPathBb(BBJ_ALWAYS): [weight: 0.64] + // rtLookupLcl = fastPathValue; + // goto block; + // + // fallbackBb(BBJ_NONE): [weight: 0.36] + // rtLookupLcl = HelperCall(); + // + // block(...): [weight: 1.0] + // use(rtLookupLcl); + // + + sizeCheckBb = fgNewBBafter(BBJ_COND, nullcheckBb, true); + sizeCheckBb->bbFlags |= (BBF_INTERNAL | BBF_HAS_JMP); + + // sizeValue = dictionary[pRuntimeLookup->sizeOffset] + GenTreeIntCon* sizeOffset = gtNewIconNode(runtimeLookup.sizeOffset, TYP_I_IMPL); + assert(lastIndOfTree != nullptr); + GenTree* sizeValueOffset = gtNewOperNode(GT_ADD, TYP_I_IMPL, lastIndOfTree, sizeOffset); + GenTree* sizeValue = gtNewOperNode(GT_IND, TYP_I_IMPL, sizeValueOffset); + sizeValue->gtFlags |= GTF_IND_NONFAULTING; + + // sizeCheck fails if sizeValue < pRuntimeLookup->offsets[i] + GenTree* offsetValue = gtNewIconNode(runtimeLookup.offsets[runtimeLookup.indirections - 1], TYP_I_IMPL); + GenTree* sizeCheck = gtNewOperNode(GT_LT, TYP_INT, sizeValue, offsetValue); + sizeCheck->gtFlags |= GTF_RELOP_JMP_USED; + gtSetEvalOrder(sizeCheck); + Statement* sizeCheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck)); + gtSetStmtInfo(sizeCheckStmt); + fgSetStmtSeq(sizeCheckStmt); + fgInsertStmtAtEnd(sizeCheckBb, sizeCheckStmt); + } // Replace call with rtLookupLclNum local call->ReplaceWith(gtNewLclvNode(rtLookupLclNum, call->TypeGet()), this); @@ -255,25 +306,70 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Connect all new blocks together fgAddRefPred(nullcheckBb, prevBb); - fgAddRefPred(fallbackBb, nullcheckBb); - fgAddRefPred(fastPathBb, nullcheckBb); fgRemoveRefPred(block, prevBb); fgAddRefPred(block, fastPathBb); fgAddRefPred(block, fallbackBb); - nullcheckBb->bbJumpDest = fastPathBb; + nullcheckBb->bbJumpDest = fallbackBb; fastPathBb->bbJumpDest = block; - fallbackBb->bbJumpDest = block; - // Re-distribute weights + if (needsSizeCheck) + { + // nullcheckBb flows into sizeCheckBb in case of non-null + fgAddRefPred(sizeCheckBb, nullcheckBb); + + // fallbackBb is reachable from either nullcheck or sizecheck + fgAddRefPred(fallbackBb, nullcheckBb); + fgAddRefPred(fallbackBb, sizeCheckBb); + + // fastPathBb is only reachable from successful sizeCheckBb + fgAddRefPred(fastPathBb, sizeCheckBb); + + // sizeCheckBb fails - jump to fallbackBb + sizeCheckBb->bbJumpDest = fallbackBb; + } + else + { + // No size check, nullcheckBb jumps to fast path + fgAddRefPred(fastPathBb, nullcheckBb); + + // fallbackBb is only reachable from nullcheckBb (jump destination) + fgAddRefPred(fallbackBb, nullcheckBb); + } + + // Re-distribute weights (see '[weight: X]' on the diagrams above) + + // First, nullcheck and the last block are expected to just inherit prevBb weight nullcheckBb->inheritWeight(prevBb); - fallbackBb->inheritWeightPercentage(nullcheckBb, 20); // TODO: Consider making it cold (0%) - fastPathBb->inheritWeightPercentage(nullcheckBb, 80); block->inheritWeight(prevBb); + if (needsSizeCheck) + { + // 80% chance we pass nullcheck + sizeCheckBb->inheritWeightPercentage(nullcheckBb, 80); + + // 64% (0.8 * 0.8) chance we pass both nullcheck and sizecheck + fastPathBb->inheritWeightPercentage(sizeCheckBb, 80); + + // 100-64=36% chance we fail either nullcheck or sizecheck + fallbackBb->inheritWeightPercentage(nullcheckBb, 36); + } + else + { + // 80% chance we pass nullcheck + fastPathBb->inheritWeightPercentage(nullcheckBb, 80); + + // 20% chance we fail nullcheck (TODO: Consider making it cold (0%)) + fallbackBb->inheritWeightPercentage(nullcheckBb, 20); + } + // All blocks are expected to be in the same EH region assert(BasicBlock::sameEHRegion(prevBb, block)); assert(BasicBlock::sameEHRegion(prevBb, nullcheckBb)); assert(BasicBlock::sameEHRegion(prevBb, fastPathBb)); + if (needsSizeCheck) + { + assert(BasicBlock::sameEHRegion(prevBb, sizeCheckBb)); + } // Scan current block again, the current call will be ignored because of ClearExpRuntimeLookup. // We don't try to re-use expansions for the same lookups in the current block here - CSE is responsible From 3c1cb1c8316c987561240ca64b6732c64c437c53 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 26 Feb 2023 20:40:07 +0100 Subject: [PATCH 17/63] Add dynamic expansion --- src/coreclr/jit/flowgraph.cpp | 116 ++++++++++++++++++++-------------- 1 file changed, 68 insertions(+), 48 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 42a8396539b95f..037ea0cfec8d47 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -122,44 +122,57 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } const bool needsSizeCheck = runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK; - if (needsSizeCheck) - { - // TODO: implement dynamic expansion - continue; - } assert(runtimeLookup.indirections != 0); assert(runtimeLookup.testForNull); + BasicBlockFlags originalFlags = block->bbFlags; + BasicBlock* prevBb = block; + + if (prevStmt == nullptr) + { + block = fgSplitBlockAtBeginning(prevBb); + } + else + { + block = fgSplitBlockAfterStatement(prevBb, prevStmt); + } + const unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; GenTreeLclVar* rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); - // Save ctxTree to a local if it's complex - if (!ctxTree->OperIs(GT_LCL_VAR)) - { - // TODO: consider replacing fgMakeMultiUse here and below with statement inside nullcheckBb - fgMakeMultiUse(&ctxTree); - } + // Save expression to a local and append as the last statement in prevBb + auto spillExpr = [&](GenTree* expr) -> GenTree* { + if (expr->OperIs(GT_LCL_VAR)) + { + return gtClone(expr); + } + unsigned const tmpNum = lvaGrabTemp(false DEBUGARG("spilling expr")); + lvaTable[tmpNum].lvType = expr->TypeGet(); + Statement* asgStmt = fgNewStmtAtEnd(prevBb, gtNewTempAssign(tmpNum, expr)); + gtSetStmtInfo(asgStmt); + fgSetStmtSeq(asgStmt); + return gtNewLclvNode(tmpNum, expr->TypeGet()); + }; // Prepare slotPtr tree (TODO: consider sharing this part with impRuntimeLookup) - GenTree* slotPtrTree = ctxTree; + ctxTree = spillExpr(ctxTree); + GenTree* slotPtrTree = gtClone(ctxTree); GenTree* indOffTree = nullptr; GenTree* lastIndOfTree = nullptr; for (WORD i = 0; i < runtimeLookup.indirections; i++) { if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) { - indOffTree = fgMakeMultiUse(&slotPtrTree); + indOffTree = spillExpr(slotPtrTree); } // The last indirection could be subject to a size check (dynamic dictionary expansion) - bool isLastIndirectionWithSizeCheck = ((i == runtimeLookup.indirections - 1) && - (runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK)); + const bool isLastIndirectionWithSizeCheck = (i == runtimeLookup.indirections - 1) && needsSizeCheck; if (i != 0) { - slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; if (!isLastIndirectionWithSizeCheck) @@ -177,8 +190,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() { if (isLastIndirectionWithSizeCheck) { - lastIndOfTree = impCloneExpr(slotPtrTree, &slotPtrTree, NO_CLASS_HANDLE, CHECK_SPILL_ALL, - nullptr DEBUGARG("impRuntimeLookup indirectOffset")); + lastIndOfTree = spillExpr(slotPtrTree); } slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, slotPtrTree, @@ -186,11 +198,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } } - BasicBlockFlags originalFlags = block->bbFlags; - BasicBlock* prevBb = block; - block = prevStmt == nullptr ? fgSplitBlockAtBeginning(prevBb) - : fgSplitBlockAfterStatement(prevBb, prevStmt); - prevBb->bbFlags = originalFlags & (~(BBF_SPLIT_LOST | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL) | BBF_GC_SAFE_POINT); block->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | @@ -220,7 +227,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() BasicBlock* nullcheckBb = fgNewBBafter(BBJ_COND, prevBb, true); nullcheckBb->bbFlags |= (BBF_INTERNAL | BBF_HAS_JMP); - GenTree* fastPathValue = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); + GenTree* fastPathValue = gtNewOperNode(GT_IND, TYP_I_IMPL, gtCloneExpr(slotPtrTree)); fastPathValue->gtFlags |= GTF_IND_NONFAULTING; GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); @@ -235,11 +242,15 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Fallback basic block BasicBlock* fallbackBb = fgNewBBafter(BBJ_NONE, nullcheckBb, true); fallbackBb->bbFlags |= BBF_INTERNAL; - Statement* asgFallbackStmt = - fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(call))); + + GenTreeCall* fallbackCall = gtCloneExpr(call)->AsCall(); + assert(ctxTree->OperIs(GT_LCL_VAR)); + fallbackCall->gtArgs.GetArgByIndex(0)->SetLateNode(gtClone(ctxTree)); + Statement* asgFallbackStmt = fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fallbackCall)); fgInsertStmtAtBeg(fallbackBb, asgFallbackStmt); gtSetStmtInfo(asgFallbackStmt); fgSetStmtSeq(asgFallbackStmt); + gtUpdateTreeAncestorsSideEffects(fallbackCall); // Fast-path basic block BasicBlock* fastPathBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); @@ -256,18 +267,18 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Dynamic expansion case (sizeCheckBb is added and some preds are changed): // // prevBb(BBJ_NONE): [weight: 1.0] - // ... // - // nullcheckBb(BBJ_COND): [weight: 1.0] - // if (fastPathValue == 0) + // sizeCheckBb(BBJ_COND): [weight: 1.0] + // if (sizeValue <= offsetValue) // goto fallbackBb; + // ... // - // sizeCheckBb(BBJ_COND): [weight: 0.8] - // if (fastPathValue == 0) + // nullcheckBb(BBJ_COND): [weight: 0.8] + // if (*fastPathValue == null) // goto fallbackBb; // // fastPathBb(BBJ_ALWAYS): [weight: 0.64] - // rtLookupLcl = fastPathValue; + // rtLookupLcl = *fastPathValue; // goto block; // // fallbackBb(BBJ_NONE): [weight: 0.36] @@ -277,19 +288,20 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // use(rtLookupLcl); // - sizeCheckBb = fgNewBBafter(BBJ_COND, nullcheckBb, true); + sizeCheckBb = fgNewBBbefore(BBJ_COND, nullcheckBb, true); sizeCheckBb->bbFlags |= (BBF_INTERNAL | BBF_HAS_JMP); // sizeValue = dictionary[pRuntimeLookup->sizeOffset] GenTreeIntCon* sizeOffset = gtNewIconNode(runtimeLookup.sizeOffset, TYP_I_IMPL); assert(lastIndOfTree != nullptr); GenTree* sizeValueOffset = gtNewOperNode(GT_ADD, TYP_I_IMPL, lastIndOfTree, sizeOffset); - GenTree* sizeValue = gtNewOperNode(GT_IND, TYP_I_IMPL, sizeValueOffset); + GenTree* sizeValue = gtNewOperNode(GT_IND, TYP_I_IMPL, sizeValueOffset); sizeValue->gtFlags |= GTF_IND_NONFAULTING; - // sizeCheck fails if sizeValue < pRuntimeLookup->offsets[i] - GenTree* offsetValue = gtNewIconNode(runtimeLookup.offsets[runtimeLookup.indirections - 1], TYP_I_IMPL); - GenTree* sizeCheck = gtNewOperNode(GT_LT, TYP_INT, sizeValue, offsetValue); + // sizeCheck fails if sizeValue <= pRuntimeLookup->offsets[i] + GenTree* offsetValue = + gtNewIconNode(runtimeLookup.offsets[runtimeLookup.indirections - 1], TYP_I_IMPL); + GenTree* sizeCheck = gtNewOperNode(GT_LE, TYP_INT, sizeValue, offsetValue); sizeCheck->gtFlags |= GTF_RELOP_JMP_USED; gtSetEvalOrder(sizeCheck); Statement* sizeCheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck)); @@ -305,7 +317,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fgSetStmtSeq(stmt); // Connect all new blocks together - fgAddRefPred(nullcheckBb, prevBb); fgRemoveRefPred(block, prevBb); fgAddRefPred(block, fastPathBb); fgAddRefPred(block, fallbackBb); @@ -314,21 +325,27 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if (needsSizeCheck) { - // nullcheckBb flows into sizeCheckBb in case of non-null - fgAddRefPred(sizeCheckBb, nullcheckBb); + // Size check is the first block after prevBb + fgAddRefPred(sizeCheckBb, prevBb); + + // sizeCheckBb flows into nullcheckBb in case if the size check passes + fgAddRefPred(nullcheckBb, sizeCheckBb); - // fallbackBb is reachable from either nullcheck or sizecheck + // fallbackBb is reachable from both nullcheckBb and sizeCheckBb fgAddRefPred(fallbackBb, nullcheckBb); fgAddRefPred(fallbackBb, sizeCheckBb); - // fastPathBb is only reachable from successful sizeCheckBb - fgAddRefPred(fastPathBb, sizeCheckBb); + // fastPathBb is only reachable from successful nullcheckBb + fgAddRefPred(fastPathBb, nullcheckBb); // sizeCheckBb fails - jump to fallbackBb sizeCheckBb->bbJumpDest = fallbackBb; } else { + // nullcheckBb is the first block after prevBb + fgAddRefPred(nullcheckBb, prevBb); + // No size check, nullcheckBb jumps to fast path fgAddRefPred(fastPathBb, nullcheckBb); @@ -338,23 +355,25 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Re-distribute weights (see '[weight: X]' on the diagrams above) - // First, nullcheck and the last block are expected to just inherit prevBb weight - nullcheckBb->inheritWeight(prevBb); block->inheritWeight(prevBb); if (needsSizeCheck) { + sizeCheckBb->inheritWeight(prevBb); + // 80% chance we pass nullcheck - sizeCheckBb->inheritWeightPercentage(nullcheckBb, 80); + nullcheckBb->inheritWeightPercentage(sizeCheckBb, 80); // 64% (0.8 * 0.8) chance we pass both nullcheck and sizecheck - fastPathBb->inheritWeightPercentage(sizeCheckBb, 80); + fastPathBb->inheritWeightPercentage(nullcheckBb, 80); // 100-64=36% chance we fail either nullcheck or sizecheck - fallbackBb->inheritWeightPercentage(nullcheckBb, 36); + fallbackBb->inheritWeightPercentage(sizeCheckBb, 36); } else { + nullcheckBb->inheritWeight(prevBb); + // 80% chance we pass nullcheck fastPathBb->inheritWeightPercentage(nullcheckBb, 80); @@ -375,6 +394,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // We don't try to re-use expansions for the same lookups in the current block here - CSE is responsible // for that result = PhaseStatus::MODIFIED_EVERYTHING; + block = prevBb; goto TRAVERSE_BLOCK_AGAIN; } prevStmt = stmt; From 47524ed55e84a6b67d18fd6b2961d324851fe7fc Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 26 Feb 2023 22:08:52 +0100 Subject: [PATCH 18/63] Add comments --- src/coreclr/jit/flowgraph.cpp | 41 +++++++++++++++++++++++++++-------- src/coreclr/jit/utils.cpp | 2 +- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 037ea0cfec8d47..6182da2775a9da 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -62,15 +62,17 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() { PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; - // Current method doesn't have runtime lookups - bail out. if (!doesMethodHaveExpRuntimeLookup()) { + JITDUMP("Current method doesn't have runtime lookups - bail out.") return result; } + // Find all calls with GTF_CALL_M_EXP_RUNTIME_LOOKUP flag for (BasicBlock* block : Blocks()) { TRAVERSE_BLOCK_AGAIN: + Statement* prevStmt = nullptr; for (Statement* const stmt : block->Statements()) { @@ -83,6 +85,10 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } assert(tree->IsHelperCall()); + JITDUMP("Expanding runtime lookup for [%06d] in " FMT_BB ":\n", dspTreeID(tree), block->bbNum) + DISPTREE(tree) + JITDUMP("\n") + GenTreeCall* call = tree->AsCall(); call->ClearExpRuntimeLookup(); assert(call->gtArgs.CountArgs() == 2); @@ -112,7 +118,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() continue; } } - assert(signature != nullptr); CORINFO_RUNTIME_LOOKUP runtimeLookup = {}; @@ -122,6 +127,10 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } const bool needsSizeCheck = runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK; + if (needsSizeCheck) + { + JITDUMP("dynamic expansion, needs size check.\n") + } assert(runtimeLookup.indirections != 0); assert(runtimeLookup.testForNull); @@ -131,13 +140,22 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if (prevStmt == nullptr) { + JITDUMP("Splitting " FMT_BB " at the beginning.\n", prevBb->bbNum) block = fgSplitBlockAtBeginning(prevBb); } else { + JITDUMP("Splitting " FMT_BB " after statement " FMT_STMT "\n", prevBb->bbNum, prevStmt->GetID()) block = fgSplitBlockAfterStatement(prevBb, prevStmt); } + // We split a block, possibly, in the middle - we need to propagate some flags + prevBb->bbFlags = + originalFlags & (~(BBF_SPLIT_LOST | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL) | BBF_GC_SAFE_POINT); + block->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | + BBF_LOOP_PREHEADER | BBF_RETLESS_CALL); + + // Define a local for the result const unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; GenTreeLclVar* rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); @@ -198,22 +216,17 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } } - prevBb->bbFlags = - originalFlags & (~(BBF_SPLIT_LOST | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL) | BBF_GC_SAFE_POINT); - block->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | - BBF_LOOP_PREHEADER | BBF_RETLESS_CALL); - // Non-dynamic expansion case (no size check): // // prevBb(BBJ_NONE): [weight: 1.0] // ... // // nullcheckBb(BBJ_COND): [weight: 1.0] - // if (fastPathValue == 0) + // if (*fastPathValue == null) // goto fallbackBb; // // fastPathBb(BBJ_ALWAYS): [weight: 0.8] - // rtLookupLcl = fastPathValue; + // rtLookupLcl = *fastPathValue; // goto block; // // fallbackBb(BBJ_NONE): [weight: 0.2] @@ -229,6 +242,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() GenTree* fastPathValue = gtNewOperNode(GT_IND, TYP_I_IMPL, gtCloneExpr(slotPtrTree)); fastPathValue->gtFlags |= GTF_IND_NONFAULTING; + + // Save dictionary slot to a local (to be used by fast path) GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); @@ -401,6 +416,14 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } } +#ifdef DEBUG + if (verbose && (result == PhaseStatus::MODIFIED_EVERYTHING)) + { + printf("\n*************** After fgExpandRuntimeLookups()\n"); + fgDispBasicBlocks(true); + } +#endif + return result; } diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index f1ca33dc6c052c..5d177ba592be66 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1345,7 +1345,7 @@ void HelperCallProperties::init() case CORINFO_HELP_RUNTIMEHANDLE_CLASS_LOG: case CORINFO_HELP_READYTORUN_GENERIC_HANDLE: // logging helpers are not technically pure but can be optimized away - isPure = false; + isPure = true; noThrow = true; nonNullReturn = true; break; From ba8fdc216d93cf577fb55a7b139ffbd67fb88dd8 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 28 Feb 2023 11:48:57 +0100 Subject: [PATCH 19/63] test --- src/coreclr/jit/flowgraph.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 6182da2775a9da..86686ced92b90c 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -238,7 +238,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // null-check basic block BasicBlock* nullcheckBb = fgNewBBafter(BBJ_COND, prevBb, true); - nullcheckBb->bbFlags |= (BBF_INTERNAL | BBF_HAS_JMP); + nullcheckBb->bbFlags |= BBF_INTERNAL; GenTree* fastPathValue = gtNewOperNode(GT_IND, TYP_I_IMPL, gtCloneExpr(slotPtrTree)); fastPathValue->gtFlags |= GTF_IND_NONFAULTING; @@ -247,7 +247,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); - nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; + nullcheckOp->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); gtSetEvalOrder(nullcheckOp); Statement* nullcheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp)); gtSetStmtInfo(nullcheckStmt); @@ -304,7 +304,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // sizeCheckBb = fgNewBBbefore(BBJ_COND, nullcheckBb, true); - sizeCheckBb->bbFlags |= (BBF_INTERNAL | BBF_HAS_JMP); + sizeCheckBb->bbFlags |= BBF_INTERNAL; // sizeValue = dictionary[pRuntimeLookup->sizeOffset] GenTreeIntCon* sizeOffset = gtNewIconNode(runtimeLookup.sizeOffset, TYP_I_IMPL); From 5d6cf4aba1531797257d9f4a656f65b3e133f52c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 28 Feb 2023 14:19:29 +0100 Subject: [PATCH 20/63] test #2 --- src/coreclr/jit/flowgraph.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 86686ced92b90c..35c5a9dd925387 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -313,11 +313,11 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() GenTree* sizeValue = gtNewOperNode(GT_IND, TYP_I_IMPL, sizeValueOffset); sizeValue->gtFlags |= GTF_IND_NONFAULTING; - // sizeCheck fails if sizeValue <= pRuntimeLookup->offsets[i] + // sizeCheck fails if sizeValue < pRuntimeLookup->offsets[i] GenTree* offsetValue = gtNewIconNode(runtimeLookup.offsets[runtimeLookup.indirections - 1], TYP_I_IMPL); - GenTree* sizeCheck = gtNewOperNode(GT_LE, TYP_INT, sizeValue, offsetValue); - sizeCheck->gtFlags |= GTF_RELOP_JMP_USED; + GenTree* sizeCheck = gtNewOperNode(GT_LT, TYP_INT, sizeValue, offsetValue); + sizeCheck->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); gtSetEvalOrder(sizeCheck); Statement* sizeCheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck)); gtSetStmtInfo(sizeCheckStmt); From ec00f5b1e6280489d6b93a0bfd6ee2756f93fdcc Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 28 Feb 2023 16:42:39 +0100 Subject: [PATCH 21/63] it should be <= --- src/coreclr/jit/flowgraph.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 35c5a9dd925387..68433d03b6b7ea 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -313,10 +313,10 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() GenTree* sizeValue = gtNewOperNode(GT_IND, TYP_I_IMPL, sizeValueOffset); sizeValue->gtFlags |= GTF_IND_NONFAULTING; - // sizeCheck fails if sizeValue < pRuntimeLookup->offsets[i] + // sizeCheck fails if sizeValue <= pRuntimeLookup->offsets[i] GenTree* offsetValue = gtNewIconNode(runtimeLookup.offsets[runtimeLookup.indirections - 1], TYP_I_IMPL); - GenTree* sizeCheck = gtNewOperNode(GT_LT, TYP_INT, sizeValue, offsetValue); + GenTree* sizeCheck = gtNewOperNode(GT_LE, TYP_INT, sizeValue, offsetValue); sizeCheck->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); gtSetEvalOrder(sizeCheck); Statement* sizeCheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck)); From ec893da976d57d1bd1e982adea333db5b7e25fc7 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 28 Feb 2023 20:14:45 +0100 Subject: [PATCH 22/63] Update flowgraph.cpp --- src/coreclr/jit/flowgraph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 68433d03b6b7ea..15c4ed6fa48f0c 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -316,7 +316,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // sizeCheck fails if sizeValue <= pRuntimeLookup->offsets[i] GenTree* offsetValue = gtNewIconNode(runtimeLookup.offsets[runtimeLookup.indirections - 1], TYP_I_IMPL); - GenTree* sizeCheck = gtNewOperNode(GT_LE, TYP_INT, sizeValue, offsetValue); + GenTree* sizeCheck = gtNewOperNode(GT_GT, TYP_INT, sizeValue, offsetValue); sizeCheck->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); gtSetEvalOrder(sizeCheck); Statement* sizeCheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck)); From f81882f427401b0c351eb5998ce19eed3b6f4d45 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 28 Feb 2023 23:44:30 +0100 Subject: [PATCH 23/63] test 3 --- src/coreclr/jit/flowgraph.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 15c4ed6fa48f0c..121b4c54374cc9 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -184,6 +184,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) { indOffTree = spillExpr(slotPtrTree); + slotPtrTree = gtClone(indOffTree); } // The last indirection could be subject to a size check (dynamic dictionary expansion) @@ -209,6 +210,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if (isLastIndirectionWithSizeCheck) { lastIndOfTree = spillExpr(slotPtrTree); + slotPtrTree = gtClone(lastIndOfTree); } slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, slotPtrTree, @@ -244,7 +246,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fastPathValue->gtFlags |= GTF_IND_NONFAULTING; // Save dictionary slot to a local (to be used by fast path) - GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); + //GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); nullcheckOp->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); @@ -271,7 +273,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() BasicBlock* fastPathBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); fastPathBb->bbFlags |= BBF_INTERNAL; Statement* asgFastPathValueStmt = - fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone)); + fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(fastPathValue))); fgInsertStmtAtBeg(fastPathBb, asgFastPathValueStmt); gtSetStmtInfo(asgFastPathValueStmt); fgSetStmtSeq(asgFastPathValueStmt); @@ -316,7 +318,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // sizeCheck fails if sizeValue <= pRuntimeLookup->offsets[i] GenTree* offsetValue = gtNewIconNode(runtimeLookup.offsets[runtimeLookup.indirections - 1], TYP_I_IMPL); - GenTree* sizeCheck = gtNewOperNode(GT_GT, TYP_INT, sizeValue, offsetValue); + GenTree* sizeCheck = gtNewOperNode(GT_LE, TYP_INT, sizeValue, offsetValue); sizeCheck->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); gtSetEvalOrder(sizeCheck); Statement* sizeCheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck)); From d1d0bc875d4c098f44984ac87d66c8b5bb32b9fe Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 1 Mar 2023 23:10:09 +0100 Subject: [PATCH 24/63] test 4 --- src/coreclr/jit/flowgraph.cpp | 31 +++++++++++++++++++++++++------ src/coreclr/jit/importer.cpp | 4 ---- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 121b4c54374cc9..6f01f8d1ee8623 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -132,6 +132,14 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() JITDUMP("dynamic expansion, needs size check.\n") } + if (block->bbNatLoopNum == BasicBlock::NOT_IN_LOOP) + { + // Test + continue; + } + + auto debugInfo = nextStmt->GetDebugInfo(); + assert(runtimeLookup.indirections != 0); assert(runtimeLookup.testForNull); @@ -169,6 +177,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() unsigned const tmpNum = lvaGrabTemp(false DEBUGARG("spilling expr")); lvaTable[tmpNum].lvType = expr->TypeGet(); Statement* asgStmt = fgNewStmtAtEnd(prevBb, gtNewTempAssign(tmpNum, expr)); + asgStmt->SetDebugInfo(debugInfo); gtSetStmtInfo(asgStmt); fgSetStmtSeq(asgStmt); return gtNewLclvNode(tmpNum, expr->TypeGet()); @@ -252,6 +261,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() nullcheckOp->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); gtSetEvalOrder(nullcheckOp); Statement* nullcheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp)); + nullcheckStmt->SetDebugInfo(debugInfo); gtSetStmtInfo(nullcheckStmt); fgSetStmtSeq(nullcheckStmt); fgInsertStmtAtEnd(nullcheckBb, nullcheckStmt); @@ -262,8 +272,9 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() GenTreeCall* fallbackCall = gtCloneExpr(call)->AsCall(); assert(ctxTree->OperIs(GT_LCL_VAR)); - fallbackCall->gtArgs.GetArgByIndex(0)->SetLateNode(gtClone(ctxTree)); + //fallbackCall->gtArgs.GetArgByIndex(0)->SetLateNode(gtClone(ctxTree)); Statement* asgFallbackStmt = fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fallbackCall)); + asgFallbackStmt->SetDebugInfo(debugInfo); fgInsertStmtAtBeg(fallbackBb, asgFallbackStmt); gtSetStmtInfo(asgFallbackStmt); fgSetStmtSeq(asgFallbackStmt); @@ -274,6 +285,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fastPathBb->bbFlags |= BBF_INTERNAL; Statement* asgFastPathValueStmt = fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(fastPathValue))); + asgFastPathValueStmt->SetDebugInfo(debugInfo); fgInsertStmtAtBeg(fastPathBb, asgFastPathValueStmt); gtSetStmtInfo(asgFastPathValueStmt); fgSetStmtSeq(asgFastPathValueStmt); @@ -322,6 +334,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() sizeCheck->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); gtSetEvalOrder(sizeCheck); Statement* sizeCheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck)); + sizeCheckStmt->SetDebugInfo(debugInfo); gtSetStmtInfo(sizeCheckStmt); fgSetStmtSeq(sizeCheckStmt); fgInsertStmtAtEnd(sizeCheckBb, sizeCheckStmt); @@ -418,13 +431,19 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } } -#ifdef DEBUG - if (verbose && (result == PhaseStatus::MODIFIED_EVERYTHING)) + if (result == PhaseStatus::MODIFIED_EVERYTHING) { - printf("\n*************** After fgExpandRuntimeLookups()\n"); - fgDispBasicBlocks(true); - } + fgReorderBlocks(/* useProfileData */ false); + fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); + +#ifdef DEBUG + if (verbose) + { + printf("\n*************** After fgExpandRuntimeLookups()\n"); + fgDispBasicBlocks(true); + } #endif + } return result; } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 4666cbd68c810b..82ea8b9867ae4e 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1872,10 +1872,6 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark1")); - // Extract the handle - GenTree* handleForNullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); - handleForNullCheck->gtFlags |= GTF_IND_NONFAULTING; - // Call the helper // - Setup argNode with the pointer to the signature returned by the lookup assert(pRuntimeLookup->signature != nullptr); From eea468a88172c5f56dbd27c69597c2ae747253a1 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 1 Mar 2023 23:26:56 +0100 Subject: [PATCH 25/63] test 4 --- src/coreclr/jit/flowgraph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 6f01f8d1ee8623..63f32a193acf6e 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -138,7 +138,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() continue; } - auto debugInfo = nextStmt->GetDebugInfo(); + auto debugInfo = stmt->GetDebugInfo(); assert(runtimeLookup.indirections != 0); assert(runtimeLookup.testForNull); From 9af10798e5b958934d491cf81e7afc951377298f Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 2 Mar 2023 16:52:03 +0100 Subject: [PATCH 26/63] Test 5 --- src/coreclr/jit/flowgraph.cpp | 34 ++++++++++++++++------------------ src/coreclr/jit/importer.cpp | 6 ++++-- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 63f32a193acf6e..dbcb5410153e61 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -92,7 +92,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() GenTreeCall* call = tree->AsCall(); call->ClearExpRuntimeLookup(); assert(call->gtArgs.CountArgs() == 2); - assert(!call->IsTailCall()); // We don't expect it here + + if (call->IsTailCall()) + { + assert(!"Unexpected runtime lookup as a tail call"); + continue; + } // call(ctx, signature); GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); @@ -121,10 +126,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() assert(signature != nullptr); CORINFO_RUNTIME_LOOKUP runtimeLookup = {}; - if (!GetSignatureToLookupInfoMap()->Lookup(signature, &runtimeLookup)) - { - continue; - } + const bool lookupFound = GetSignatureToLookupInfoMap()->Lookup(signature, &runtimeLookup); + assert(lookupFound); const bool needsSizeCheck = runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK; if (needsSizeCheck) @@ -132,13 +135,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() JITDUMP("dynamic expansion, needs size check.\n") } - if (block->bbNatLoopNum == BasicBlock::NOT_IN_LOOP) - { - // Test - continue; - } - - auto debugInfo = stmt->GetDebugInfo(); + DebugInfo debugInfo = stmt->GetDebugInfo(); assert(runtimeLookup.indirections != 0); assert(runtimeLookup.testForNull); @@ -192,7 +189,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() { if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) { - indOffTree = spillExpr(slotPtrTree); + indOffTree = spillExpr(slotPtrTree); slotPtrTree = gtClone(indOffTree); } @@ -219,7 +216,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if (isLastIndirectionWithSizeCheck) { lastIndOfTree = spillExpr(slotPtrTree); - slotPtrTree = gtClone(lastIndOfTree); + slotPtrTree = gtClone(lastIndOfTree); } slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, slotPtrTree, @@ -255,7 +252,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fastPathValue->gtFlags |= GTF_IND_NONFAULTING; // Save dictionary slot to a local (to be used by fast path) - //GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); nullcheckOp->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); @@ -272,7 +268,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() GenTreeCall* fallbackCall = gtCloneExpr(call)->AsCall(); assert(ctxTree->OperIs(GT_LCL_VAR)); - //fallbackCall->gtArgs.GetArgByIndex(0)->SetLateNode(gtClone(ctxTree)); Statement* asgFallbackStmt = fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fallbackCall)); asgFallbackStmt->SetDebugInfo(debugInfo); fgInsertStmtAtBeg(fallbackBb, asgFallbackStmt); @@ -433,8 +428,11 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if (result == PhaseStatus::MODIFIED_EVERYTHING) { - fgReorderBlocks(/* useProfileData */ false); - fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); + if (opts.OptimizationEnabled()) + { + fgReorderBlocks(/* useProfileData */ false); + fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); + } #ifdef DEBUG if (verbose) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index f1c590bb10aa9d..b598610c178603 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1891,9 +1891,11 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 setMethodHasExpRuntimeLookup(); helperCall->SetExpRuntimeLookup(); - if (!GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) + + if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) { - GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); + JITDUMP("Registering %p in SignatureToLookupInfoMap\n", pRuntimeLookup->signature) + impInlineRoot()->GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); } return helperCall; } From ea3bf76fba284840c82405dab6d74f950fdc1bd5 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 2 Mar 2023 22:27:31 +0100 Subject: [PATCH 27/63] Clean up --- src/coreclr/jit/flowgraph.cpp | 35 +++++++++++++++++++++++++++++++++++ src/coreclr/jit/gentree.cpp | 19 +++++++++++++++++++ src/coreclr/jit/lower.cpp | 3 +++ 3 files changed, 57 insertions(+) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index dbcb5410153e61..7b4d1a77c7d0a9 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -378,6 +378,23 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fgAddRefPred(fallbackBb, nullcheckBb); } + // Some quick validation + assert(prevBb->NumSucc() == 1); + if (needsSizeCheck) + { + assert(prevBb->GetSucc(0) == sizeCheckBb); + assert(sizeCheckBb->NumSucc() == 2); + } + else + { + assert(prevBb->GetSucc(0) == nullcheckBb); + } + assert(nullcheckBb->NumSucc() == 2); + assert(fastPathBb->NumSucc() == 1); + assert(fallbackBb->NumSucc() == 1); + assert(fastPathBb->GetSucc(0) == block); + assert(fallbackBb->GetSucc(0) == block); + // Re-distribute weights (see '[weight: X]' on the diagrams above) block->inheritWeight(prevBb); @@ -406,6 +423,24 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fallbackBb->inheritWeightPercentage(nullcheckBb, 20); } + // Update loop info + if (prevBb->bbNatLoopNum != BasicBlock::NOT_IN_LOOP) + { + nullcheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; + fastPathBb->bbNatLoopNum = prevBb->bbNatLoopNum; + fallbackBb->bbNatLoopNum = prevBb->bbNatLoopNum; + if (needsSizeCheck) + { + sizeCheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; + } + + // Update lpBottom after block split + if (optLoopTable[prevBb->bbNatLoopNum].lpBottom == prevBb) + { + optLoopTable[prevBb->bbNatLoopNum].lpBottom = block; + } + } + // All blocks are expected to be in the same EH region assert(BasicBlock::sameEHRegion(prevBb, block)); assert(BasicBlock::sameEHRegion(prevBb, nullcheckBb)); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index bf8a6ef3b92689..3330ebfcfcfb03 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -5780,6 +5780,25 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) GenTreeCall* call; call = tree->AsCall(); + if (call->IsHelperCall()) + { + switch (eeGetHelperNum(call->gtCallMethHnd)) + { + case CORINFO_HELP_RUNTIMEHANDLE_METHOD: + case CORINFO_HELP_RUNTIMEHANDLE_CLASS: + case CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG: + case CORINFO_HELP_RUNTIMEHANDLE_CLASS_LOG: + case CORINFO_HELP_READYTORUN_GENERIC_HANDLE: + // These helpers a hoistable and are extremely slow. Also, they introduce a control flow in a + // late phase + costEx *= 2; + costSz *= 3; + break; + default: + break; + } + } + // Evaluate the arguments lvl2 = gtSetCallArgsOrder(&call->gtArgs, /* lateArgs */ false, &costEx, &costSz); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 81247ca991f77d..29929fc319dc8a 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1790,6 +1790,9 @@ void Lowering::LowerCall(GenTree* node) DISPTREERANGE(BlockRange(), call); JITDUMP("\n"); + // All runtime lookups are expected to be expanded in fgExpandRuntimeLookups + assert(!call->IsExpRuntimeLookup()); + call->ClearOtherRegs(); LowerArgsForCall(call); From 8a2a8ad4bbdb7b29b8158a8ade85f6450c6dfc2f Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 2 Mar 2023 23:27:31 +0100 Subject: [PATCH 28/63] Found it! --- src/coreclr/jit/flowgraph.cpp | 6 ++++++ src/coreclr/jit/importer.cpp | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 7b4d1a77c7d0a9..9f74e24a2702df 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -180,6 +180,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() return gtNewLclvNode(tmpNum, expr->TypeGet()); }; + // if sigTree was not a constant e.g. COMMA(..., CNS)) - spill it + if (!sigTree->IsCnsIntOrI()) + { + spillExpr(sigTree); + } + // Prepare slotPtr tree (TODO: consider sharing this part with impRuntimeLookup) ctxTree = spillExpr(ctxTree); GenTree* slotPtrTree = gtClone(ctxTree); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index b598610c178603..92227dd5ed430a 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1889,7 +1889,7 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 - setMethodHasExpRuntimeLookup(); + impInlineRoot()->setMethodHasExpRuntimeLookup(); helperCall->SetExpRuntimeLookup(); if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) From 777def65963e28367ee24ae69f240cda1ec5b108 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 3 Mar 2023 08:39:41 +0100 Subject: [PATCH 29/63] fix assert --- src/coreclr/jit/compiler.h | 5 ----- src/coreclr/jit/flowgraph.cpp | 18 +++++++++++++++++- src/coreclr/jit/gentree.cpp | 18 ------------------ src/coreclr/jit/indirectcalltransformer.cpp | 1 - 4 files changed, 17 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 27515ad895b946..381e2ace3f69c8 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7039,11 +7039,6 @@ class Compiler optMethodFlags |= OMF_HAS_EXPRUNTIMELOOKUP; } - void clearMethodHasExpRuntimeLookup() - { - optMethodFlags &= ~OMF_HAS_EXPRUNTIMELOOKUP; - } - bool doesMethodHavePatchpoints() { return (optMethodFlags & OMF_HAS_PATCHPOINT) != 0; diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 9f74e24a2702df..5dc2ae30f0f6b5 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -64,6 +64,19 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if (!doesMethodHaveExpRuntimeLookup()) { + +#ifdef DEBUG + for (BasicBlock* block : Blocks()) + { + for (Statement* stmt : block->Statements()) + { + for (GenTree* tree : stmt->TreeList()) + { + assert(!tree->IsCall() || (tree->IsCall() && !tree->AsCall()->IsExpRuntimeLookup())); + } + } + } +#endif JITDUMP("Current method doesn't have runtime lookups - bail out.") return result; } @@ -257,6 +270,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() GenTree* fastPathValue = gtNewOperNode(GT_IND, TYP_I_IMPL, gtCloneExpr(slotPtrTree)); fastPathValue->gtFlags |= GTF_IND_NONFAULTING; + GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); + // Save dictionary slot to a local (to be used by fast path) GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); @@ -273,6 +288,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fallbackBb->bbFlags |= BBF_INTERNAL; GenTreeCall* fallbackCall = gtCloneExpr(call)->AsCall(); + assert(!fallbackCall->IsExpRuntimeLookup()); assert(ctxTree->OperIs(GT_LCL_VAR)); Statement* asgFallbackStmt = fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fallbackCall)); asgFallbackStmt->SetDebugInfo(debugInfo); @@ -285,7 +301,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() BasicBlock* fastPathBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); fastPathBb->bbFlags |= BBF_INTERNAL; Statement* asgFastPathValueStmt = - fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), gtCloneExpr(fastPathValue))); + fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone)); asgFastPathValueStmt->SetDebugInfo(debugInfo); fgInsertStmtAtBeg(fastPathBb, asgFastPathValueStmt); gtSetStmtInfo(asgFastPathValueStmt); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 3330ebfcfcfb03..d987042622200c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -5780,24 +5780,6 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) GenTreeCall* call; call = tree->AsCall(); - if (call->IsHelperCall()) - { - switch (eeGetHelperNum(call->gtCallMethHnd)) - { - case CORINFO_HELP_RUNTIMEHANDLE_METHOD: - case CORINFO_HELP_RUNTIMEHANDLE_CLASS: - case CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG: - case CORINFO_HELP_RUNTIMEHANDLE_CLASS_LOG: - case CORINFO_HELP_READYTORUN_GENERIC_HANDLE: - // These helpers a hoistable and are extremely slow. Also, they introduce a control flow in a - // late phase - costEx *= 2; - costSz *= 3; - break; - default: - break; - } - } // Evaluate the arguments diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index e9c6d5b27fa811..b62ffc4e5a6752 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -1260,7 +1260,6 @@ PhaseStatus Compiler::fgTransformIndirectCalls() } clearMethodHasFatPointer(); - clearMethodHasExpRuntimeLookup(); } else { From b18b12e3228536060f21aa34624aa7a48605f7e1 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 3 Mar 2023 08:40:11 +0100 Subject: [PATCH 30/63] fix assert --- src/coreclr/jit/gentree.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d987042622200c..bf8a6ef3b92689 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -5780,7 +5780,6 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) GenTreeCall* call; call = tree->AsCall(); - // Evaluate the arguments lvl2 = gtSetCallArgsOrder(&call->gtArgs, /* lateArgs */ false, &costEx, &costSz); From 5eee49bf7e535c6b63a6bbdd0dc8900b5ba3dfd9 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 3 Mar 2023 10:58:55 +0100 Subject: [PATCH 31/63] move to separate file + fix some diff regressions --- src/coreclr/jit/CMakeLists.txt | 1 + src/coreclr/jit/flowgraph.cpp | 451 ----------------------------- src/coreclr/jit/runtimelookup.cpp | 461 ++++++++++++++++++++++++++++++ 3 files changed, 462 insertions(+), 451 deletions(-) create mode 100644 src/coreclr/jit/runtimelookup.cpp diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 50e4706c329f1f..4ae597be53088f 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -125,6 +125,7 @@ set( JIT_SOURCES hwintrinsic.cpp hostallocator.cpp ifconversion.cpp + runtimelookup.cpp indirectcalltransformer.cpp importercalls.cpp importer.cpp diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 5dc2ae30f0f6b5..1fc11f2be50b77 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -52,457 +52,6 @@ static bool blockNeedsGCPoll(BasicBlock* block) return blockMayNeedGCPoll; } -//------------------------------------------------------------------------------ -// fgExpandRuntimeLookups : partially expand runtime lookups helper calls -// to add a nullcheck [+ size check] and a fast path -// Returns: -// PhaseStatus indicating what, if anything, was changed. -// -PhaseStatus Compiler::fgExpandRuntimeLookups() -{ - PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; - - if (!doesMethodHaveExpRuntimeLookup()) - { - -#ifdef DEBUG - for (BasicBlock* block : Blocks()) - { - for (Statement* stmt : block->Statements()) - { - for (GenTree* tree : stmt->TreeList()) - { - assert(!tree->IsCall() || (tree->IsCall() && !tree->AsCall()->IsExpRuntimeLookup())); - } - } - } -#endif - JITDUMP("Current method doesn't have runtime lookups - bail out.") - return result; - } - - // Find all calls with GTF_CALL_M_EXP_RUNTIME_LOOKUP flag - for (BasicBlock* block : Blocks()) - { - TRAVERSE_BLOCK_AGAIN: - - Statement* prevStmt = nullptr; - for (Statement* const stmt : block->Statements()) - { - for (GenTree* const tree : stmt->TreeList()) - { - // We only need calls with IsExpRuntimeLookup() flag - if (!tree->IsCall() || !tree->AsCall()->IsExpRuntimeLookup()) - { - continue; - } - assert(tree->IsHelperCall()); - - JITDUMP("Expanding runtime lookup for [%06d] in " FMT_BB ":\n", dspTreeID(tree), block->bbNum) - DISPTREE(tree) - JITDUMP("\n") - - GenTreeCall* call = tree->AsCall(); - call->ClearExpRuntimeLookup(); - assert(call->gtArgs.CountArgs() == 2); - - if (call->IsTailCall()) - { - assert(!"Unexpected runtime lookup as a tail call"); - continue; - } - - // call(ctx, signature); - GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); - GenTree* sigTree = call->gtArgs.GetArgByIndex(1)->GetNode(); - - void* signature = nullptr; - if (sigTree->IsCnsIntOrI()) - { - signature = (void*)sigTree->AsIntCon()->IconValue(); - } - else - { - // signature is not a constant (CSE'd?) - let's see if we can access it via VN - if (vnStore->IsVNConstant(sigTree->gtVNPair.GetLiberal())) - { - signature = (void*)vnStore->CoercedConstantValue(sigTree->gtVNPair.GetLiberal()); - } - else - { - // Technically, it is possible (e.g. it was CSE'd and then VN was erased), but for Debug mode we - // want to catch such cases as we really don't want to emit just a fallback call - it's too slow - assert(!"can't restore signature argument value"); - continue; - } - } - assert(signature != nullptr); - - CORINFO_RUNTIME_LOOKUP runtimeLookup = {}; - const bool lookupFound = GetSignatureToLookupInfoMap()->Lookup(signature, &runtimeLookup); - assert(lookupFound); - - const bool needsSizeCheck = runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK; - if (needsSizeCheck) - { - JITDUMP("dynamic expansion, needs size check.\n") - } - - DebugInfo debugInfo = stmt->GetDebugInfo(); - - assert(runtimeLookup.indirections != 0); - assert(runtimeLookup.testForNull); - - BasicBlockFlags originalFlags = block->bbFlags; - BasicBlock* prevBb = block; - - if (prevStmt == nullptr) - { - JITDUMP("Splitting " FMT_BB " at the beginning.\n", prevBb->bbNum) - block = fgSplitBlockAtBeginning(prevBb); - } - else - { - JITDUMP("Splitting " FMT_BB " after statement " FMT_STMT "\n", prevBb->bbNum, prevStmt->GetID()) - block = fgSplitBlockAfterStatement(prevBb, prevStmt); - } - - // We split a block, possibly, in the middle - we need to propagate some flags - prevBb->bbFlags = - originalFlags & (~(BBF_SPLIT_LOST | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL) | BBF_GC_SAFE_POINT); - block->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | - BBF_LOOP_PREHEADER | BBF_RETLESS_CALL); - - // Define a local for the result - const unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); - lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; - GenTreeLclVar* rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); - - // Save expression to a local and append as the last statement in prevBb - auto spillExpr = [&](GenTree* expr) -> GenTree* { - if (expr->OperIs(GT_LCL_VAR)) - { - return gtClone(expr); - } - unsigned const tmpNum = lvaGrabTemp(false DEBUGARG("spilling expr")); - lvaTable[tmpNum].lvType = expr->TypeGet(); - Statement* asgStmt = fgNewStmtAtEnd(prevBb, gtNewTempAssign(tmpNum, expr)); - asgStmt->SetDebugInfo(debugInfo); - gtSetStmtInfo(asgStmt); - fgSetStmtSeq(asgStmt); - return gtNewLclvNode(tmpNum, expr->TypeGet()); - }; - - // if sigTree was not a constant e.g. COMMA(..., CNS)) - spill it - if (!sigTree->IsCnsIntOrI()) - { - spillExpr(sigTree); - } - - // Prepare slotPtr tree (TODO: consider sharing this part with impRuntimeLookup) - ctxTree = spillExpr(ctxTree); - GenTree* slotPtrTree = gtClone(ctxTree); - GenTree* indOffTree = nullptr; - GenTree* lastIndOfTree = nullptr; - for (WORD i = 0; i < runtimeLookup.indirections; i++) - { - if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) - { - indOffTree = spillExpr(slotPtrTree); - slotPtrTree = gtClone(indOffTree); - } - - // The last indirection could be subject to a size check (dynamic dictionary expansion) - const bool isLastIndirectionWithSizeCheck = (i == runtimeLookup.indirections - 1) && needsSizeCheck; - - if (i != 0) - { - slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); - slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; - if (!isLastIndirectionWithSizeCheck) - { - slotPtrTree->gtFlags |= GTF_IND_INVARIANT; - } - } - - if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) - { - slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, indOffTree, slotPtrTree); - } - - if (runtimeLookup.offsets[i] != 0) - { - if (isLastIndirectionWithSizeCheck) - { - lastIndOfTree = spillExpr(slotPtrTree); - slotPtrTree = gtClone(lastIndOfTree); - } - - slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, slotPtrTree, - gtNewIconNode(runtimeLookup.offsets[i], TYP_I_IMPL)); - } - } - - // Non-dynamic expansion case (no size check): - // - // prevBb(BBJ_NONE): [weight: 1.0] - // ... - // - // nullcheckBb(BBJ_COND): [weight: 1.0] - // if (*fastPathValue == null) - // goto fallbackBb; - // - // fastPathBb(BBJ_ALWAYS): [weight: 0.8] - // rtLookupLcl = *fastPathValue; - // goto block; - // - // fallbackBb(BBJ_NONE): [weight: 0.2] - // rtLookupLcl = HelperCall(); - // - // block(...): [weight: 1.0] - // use(rtLookupLcl); - // - - // null-check basic block - BasicBlock* nullcheckBb = fgNewBBafter(BBJ_COND, prevBb, true); - nullcheckBb->bbFlags |= BBF_INTERNAL; - - GenTree* fastPathValue = gtNewOperNode(GT_IND, TYP_I_IMPL, gtCloneExpr(slotPtrTree)); - fastPathValue->gtFlags |= GTF_IND_NONFAULTING; - - GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); - - // Save dictionary slot to a local (to be used by fast path) - - GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); - nullcheckOp->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); - gtSetEvalOrder(nullcheckOp); - Statement* nullcheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp)); - nullcheckStmt->SetDebugInfo(debugInfo); - gtSetStmtInfo(nullcheckStmt); - fgSetStmtSeq(nullcheckStmt); - fgInsertStmtAtEnd(nullcheckBb, nullcheckStmt); - - // Fallback basic block - BasicBlock* fallbackBb = fgNewBBafter(BBJ_NONE, nullcheckBb, true); - fallbackBb->bbFlags |= BBF_INTERNAL; - - GenTreeCall* fallbackCall = gtCloneExpr(call)->AsCall(); - assert(!fallbackCall->IsExpRuntimeLookup()); - assert(ctxTree->OperIs(GT_LCL_VAR)); - Statement* asgFallbackStmt = fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fallbackCall)); - asgFallbackStmt->SetDebugInfo(debugInfo); - fgInsertStmtAtBeg(fallbackBb, asgFallbackStmt); - gtSetStmtInfo(asgFallbackStmt); - fgSetStmtSeq(asgFallbackStmt); - gtUpdateTreeAncestorsSideEffects(fallbackCall); - - // Fast-path basic block - BasicBlock* fastPathBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); - fastPathBb->bbFlags |= BBF_INTERNAL; - Statement* asgFastPathValueStmt = - fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone)); - asgFastPathValueStmt->SetDebugInfo(debugInfo); - fgInsertStmtAtBeg(fastPathBb, asgFastPathValueStmt); - gtSetStmtInfo(asgFastPathValueStmt); - fgSetStmtSeq(asgFastPathValueStmt); - - BasicBlock* sizeCheckBb = nullptr; - if (needsSizeCheck) - { - // Dynamic expansion case (sizeCheckBb is added and some preds are changed): - // - // prevBb(BBJ_NONE): [weight: 1.0] - // - // sizeCheckBb(BBJ_COND): [weight: 1.0] - // if (sizeValue <= offsetValue) - // goto fallbackBb; - // ... - // - // nullcheckBb(BBJ_COND): [weight: 0.8] - // if (*fastPathValue == null) - // goto fallbackBb; - // - // fastPathBb(BBJ_ALWAYS): [weight: 0.64] - // rtLookupLcl = *fastPathValue; - // goto block; - // - // fallbackBb(BBJ_NONE): [weight: 0.36] - // rtLookupLcl = HelperCall(); - // - // block(...): [weight: 1.0] - // use(rtLookupLcl); - // - - sizeCheckBb = fgNewBBbefore(BBJ_COND, nullcheckBb, true); - sizeCheckBb->bbFlags |= BBF_INTERNAL; - - // sizeValue = dictionary[pRuntimeLookup->sizeOffset] - GenTreeIntCon* sizeOffset = gtNewIconNode(runtimeLookup.sizeOffset, TYP_I_IMPL); - assert(lastIndOfTree != nullptr); - GenTree* sizeValueOffset = gtNewOperNode(GT_ADD, TYP_I_IMPL, lastIndOfTree, sizeOffset); - GenTree* sizeValue = gtNewOperNode(GT_IND, TYP_I_IMPL, sizeValueOffset); - sizeValue->gtFlags |= GTF_IND_NONFAULTING; - - // sizeCheck fails if sizeValue <= pRuntimeLookup->offsets[i] - GenTree* offsetValue = - gtNewIconNode(runtimeLookup.offsets[runtimeLookup.indirections - 1], TYP_I_IMPL); - GenTree* sizeCheck = gtNewOperNode(GT_LE, TYP_INT, sizeValue, offsetValue); - sizeCheck->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); - gtSetEvalOrder(sizeCheck); - Statement* sizeCheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck)); - sizeCheckStmt->SetDebugInfo(debugInfo); - gtSetStmtInfo(sizeCheckStmt); - fgSetStmtSeq(sizeCheckStmt); - fgInsertStmtAtEnd(sizeCheckBb, sizeCheckStmt); - } - - // Replace call with rtLookupLclNum local - call->ReplaceWith(gtNewLclvNode(rtLookupLclNum, call->TypeGet()), this); - gtUpdateTreeAncestorsSideEffects(call); - gtSetStmtInfo(stmt); - fgSetStmtSeq(stmt); - - // Connect all new blocks together - fgRemoveRefPred(block, prevBb); - fgAddRefPred(block, fastPathBb); - fgAddRefPred(block, fallbackBb); - nullcheckBb->bbJumpDest = fallbackBb; - fastPathBb->bbJumpDest = block; - - if (needsSizeCheck) - { - // Size check is the first block after prevBb - fgAddRefPred(sizeCheckBb, prevBb); - - // sizeCheckBb flows into nullcheckBb in case if the size check passes - fgAddRefPred(nullcheckBb, sizeCheckBb); - - // fallbackBb is reachable from both nullcheckBb and sizeCheckBb - fgAddRefPred(fallbackBb, nullcheckBb); - fgAddRefPred(fallbackBb, sizeCheckBb); - - // fastPathBb is only reachable from successful nullcheckBb - fgAddRefPred(fastPathBb, nullcheckBb); - - // sizeCheckBb fails - jump to fallbackBb - sizeCheckBb->bbJumpDest = fallbackBb; - } - else - { - // nullcheckBb is the first block after prevBb - fgAddRefPred(nullcheckBb, prevBb); - - // No size check, nullcheckBb jumps to fast path - fgAddRefPred(fastPathBb, nullcheckBb); - - // fallbackBb is only reachable from nullcheckBb (jump destination) - fgAddRefPred(fallbackBb, nullcheckBb); - } - - // Some quick validation - assert(prevBb->NumSucc() == 1); - if (needsSizeCheck) - { - assert(prevBb->GetSucc(0) == sizeCheckBb); - assert(sizeCheckBb->NumSucc() == 2); - } - else - { - assert(prevBb->GetSucc(0) == nullcheckBb); - } - assert(nullcheckBb->NumSucc() == 2); - assert(fastPathBb->NumSucc() == 1); - assert(fallbackBb->NumSucc() == 1); - assert(fastPathBb->GetSucc(0) == block); - assert(fallbackBb->GetSucc(0) == block); - - // Re-distribute weights (see '[weight: X]' on the diagrams above) - - block->inheritWeight(prevBb); - - if (needsSizeCheck) - { - sizeCheckBb->inheritWeight(prevBb); - - // 80% chance we pass nullcheck - nullcheckBb->inheritWeightPercentage(sizeCheckBb, 80); - - // 64% (0.8 * 0.8) chance we pass both nullcheck and sizecheck - fastPathBb->inheritWeightPercentage(nullcheckBb, 80); - - // 100-64=36% chance we fail either nullcheck or sizecheck - fallbackBb->inheritWeightPercentage(sizeCheckBb, 36); - } - else - { - nullcheckBb->inheritWeight(prevBb); - - // 80% chance we pass nullcheck - fastPathBb->inheritWeightPercentage(nullcheckBb, 80); - - // 20% chance we fail nullcheck (TODO: Consider making it cold (0%)) - fallbackBb->inheritWeightPercentage(nullcheckBb, 20); - } - - // Update loop info - if (prevBb->bbNatLoopNum != BasicBlock::NOT_IN_LOOP) - { - nullcheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; - fastPathBb->bbNatLoopNum = prevBb->bbNatLoopNum; - fallbackBb->bbNatLoopNum = prevBb->bbNatLoopNum; - if (needsSizeCheck) - { - sizeCheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; - } - - // Update lpBottom after block split - if (optLoopTable[prevBb->bbNatLoopNum].lpBottom == prevBb) - { - optLoopTable[prevBb->bbNatLoopNum].lpBottom = block; - } - } - - // All blocks are expected to be in the same EH region - assert(BasicBlock::sameEHRegion(prevBb, block)); - assert(BasicBlock::sameEHRegion(prevBb, nullcheckBb)); - assert(BasicBlock::sameEHRegion(prevBb, fastPathBb)); - if (needsSizeCheck) - { - assert(BasicBlock::sameEHRegion(prevBb, sizeCheckBb)); - } - - // Scan current block again, the current call will be ignored because of ClearExpRuntimeLookup. - // We don't try to re-use expansions for the same lookups in the current block here - CSE is responsible - // for that - result = PhaseStatus::MODIFIED_EVERYTHING; - block = prevBb; - goto TRAVERSE_BLOCK_AGAIN; - } - prevStmt = stmt; - } - } - - if (result == PhaseStatus::MODIFIED_EVERYTHING) - { - if (opts.OptimizationEnabled()) - { - fgReorderBlocks(/* useProfileData */ false); - fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); - } - -#ifdef DEBUG - if (verbose) - { - printf("\n*************** After fgExpandRuntimeLookups()\n"); - fgDispBasicBlocks(true); - } -#endif - } - - return result; -} - //------------------------------------------------------------------------------ // fgInsertGCPolls : Insert GC polls for basic blocks containing calls to methods // with SuppressGCTransitionAttribute. diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp new file mode 100644 index 00000000000000..30641f2e8f0880 --- /dev/null +++ b/src/coreclr/jit/runtimelookup.cpp @@ -0,0 +1,461 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +//------------------------------------------------------------------------------ +// fgExpandRuntimeLookups : partially expand runtime lookups helper calls +// to add a nullcheck [+ size check] and a fast path +// Returns: +// PhaseStatus indicating what, if anything, was changed. +// +PhaseStatus Compiler::fgExpandRuntimeLookups() +{ + PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; + + if (!doesMethodHaveExpRuntimeLookup()) + { + +#ifdef DEBUG + for (BasicBlock* block : Blocks()) + { + for (Statement* stmt : block->Statements()) + { + for (GenTree* tree : stmt->TreeList()) + { + assert(!tree->IsCall() || (tree->IsCall() && !tree->AsCall()->IsExpRuntimeLookup())); + } + } + } +#endif + JITDUMP("Current method doesn't have runtime lookups - bail out.") + return result; + } + + // Find all calls with GTF_CALL_M_EXP_RUNTIME_LOOKUP flag + for (BasicBlock* block : Blocks()) + { + TRAVERSE_BLOCK_AGAIN: + + Statement* prevStmt = nullptr; + for (Statement* const stmt : block->Statements()) + { + for (GenTree* const tree : stmt->TreeList()) + { + // We only need calls with IsExpRuntimeLookup() flag + if (!tree->IsCall() || !tree->AsCall()->IsExpRuntimeLookup()) + { + continue; + } + assert(tree->IsHelperCall()); + + JITDUMP("Expanding runtime lookup for [%06d] in " FMT_BB ":\n", dspTreeID(tree), block->bbNum) + DISPTREE(tree) + JITDUMP("\n") + + GenTreeCall* call = tree->AsCall(); + call->ClearExpRuntimeLookup(); + assert(call->gtArgs.CountArgs() == 2); + + if (call->IsTailCall()) + { + assert(!"Unexpected runtime lookup as a tail call"); + continue; + } + + // call(ctx, signature); + GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); + GenTree* sigTree = call->gtArgs.GetArgByIndex(1)->GetNode(); + + void* signature = nullptr; + if (sigTree->IsCnsIntOrI()) + { + signature = (void*)sigTree->AsIntCon()->IconValue(); + } + else + { + // signature is not a constant (CSE'd?) - let's see if we can access it via VN + if (vnStore->IsVNConstant(sigTree->gtVNPair.GetLiberal())) + { + signature = (void*)vnStore->CoercedConstantValue(sigTree->gtVNPair.GetLiberal()); + } + else + { + // Technically, it is possible (e.g. it was CSE'd and then VN was erased), but for Debug mode we + // want to catch such cases as we really don't want to emit just a fallback call - it's too slow + assert(!"can't restore signature argument value"); + continue; + } + } + assert(signature != nullptr); + + CORINFO_RUNTIME_LOOKUP runtimeLookup = {}; + const bool lookupFound = GetSignatureToLookupInfoMap()->Lookup(signature, &runtimeLookup); + assert(lookupFound); + + const bool needsSizeCheck = runtimeLookup.sizeOffset != CORINFO_NO_SIZE_CHECK; + if (needsSizeCheck) + { + JITDUMP("dynamic expansion, needs size check.\n") + } + + DebugInfo debugInfo = stmt->GetDebugInfo(); + + assert(runtimeLookup.indirections != 0); + assert(runtimeLookup.testForNull); + + BasicBlockFlags originalFlags = block->bbFlags; + BasicBlock* prevBb = block; + + if (prevStmt == nullptr || opts.OptimizationDisabled()) + { + JITDUMP("Splitting " FMT_BB " at the beginning.\n", prevBb->bbNum) + block = fgSplitBlockAtBeginning(prevBb); + } + else + { + JITDUMP("Splitting " FMT_BB " after statement " FMT_STMT "\n", prevBb->bbNum, prevStmt->GetID()) + block = fgSplitBlockAfterStatement(prevBb, prevStmt); + } + + // We split a block, possibly, in the middle - we need to propagate some flags + prevBb->bbFlags = + originalFlags & (~(BBF_SPLIT_LOST | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL) | BBF_GC_SAFE_POINT); + block->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | + BBF_LOOP_PREHEADER | BBF_RETLESS_CALL); + + // Define a local for the result + const unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); + lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; + GenTreeLclVar* rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); + + // Save expression to a local and append as the last statement in prevBb + auto spillExpr = [&](GenTree* expr) -> GenTree* { + if (expr->OperIs(GT_LCL_VAR)) + { + return gtClone(expr); + } + unsigned const tmpNum = lvaGrabTemp(false DEBUGARG("spilling expr")); + lvaTable[tmpNum].lvType = expr->TypeGet(); + Statement* asgStmt = fgNewStmtAtEnd(prevBb, gtNewTempAssign(tmpNum, expr)); + asgStmt->SetDebugInfo(debugInfo); + gtSetStmtInfo(asgStmt); + fgSetStmtSeq(asgStmt); + return gtNewLclvNode(tmpNum, expr->TypeGet()); + }; + + // if sigTree was not a constant e.g. COMMA(..., CNS)) - spill it + if (!sigTree->IsCnsIntOrI()) + { + spillExpr(sigTree); + } + + // Prepare slotPtr tree (TODO: consider sharing this part with impRuntimeLookup) + ctxTree = spillExpr(ctxTree); + GenTree* slotPtrTree = gtClone(ctxTree); + GenTree* indOffTree = nullptr; + GenTree* lastIndOfTree = nullptr; + for (WORD i = 0; i < runtimeLookup.indirections; i++) + { + if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) + { + indOffTree = spillExpr(slotPtrTree); + slotPtrTree = gtClone(indOffTree); + } + + // The last indirection could be subject to a size check (dynamic dictionary expansion) + const bool isLastIndirectionWithSizeCheck = (i == runtimeLookup.indirections - 1) && needsSizeCheck; + + if (i != 0) + { + slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); + slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; + if (!isLastIndirectionWithSizeCheck) + { + slotPtrTree->gtFlags |= GTF_IND_INVARIANT; + } + } + + if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) + { + slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, indOffTree, slotPtrTree); + } + + if (runtimeLookup.offsets[i] != 0) + { + if (isLastIndirectionWithSizeCheck) + { + lastIndOfTree = spillExpr(slotPtrTree); + slotPtrTree = gtClone(lastIndOfTree); + } + + slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, slotPtrTree, + gtNewIconNode(runtimeLookup.offsets[i], TYP_I_IMPL)); + } + } + + // Non-dynamic expansion case (no size check): + // + // prevBb(BBJ_NONE): [weight: 1.0] + // ... + // + // nullcheckBb(BBJ_COND): [weight: 1.0] + // if (*fastPathValue == null) + // goto fallbackBb; + // + // fastPathBb(BBJ_ALWAYS): [weight: 0.8] + // rtLookupLcl = *fastPathValue; + // goto block; + // + // fallbackBb(BBJ_NONE): [weight: 0.2] + // rtLookupLcl = HelperCall(); + // + // block(...): [weight: 1.0] + // use(rtLookupLcl); + // + + // null-check basic block + BasicBlock* nullcheckBb = fgNewBBafter(BBJ_COND, prevBb, true); + nullcheckBb->bbFlags |= BBF_INTERNAL; + + GenTree* fastPathValue = gtNewOperNode(GT_IND, TYP_I_IMPL, gtCloneExpr(slotPtrTree)); + fastPathValue->gtFlags |= GTF_IND_NONFAULTING; + + GenTree* fastPathValueClone = + opts.OptimizationEnabled() ? fgMakeMultiUse(&fastPathValue) : gtCloneExpr(fastPathValue); + + // Save dictionary slot to a local (to be used by fast path) + GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); + nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; + gtSetEvalOrder(nullcheckOp); + Statement* nullcheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp)); + nullcheckStmt->SetDebugInfo(debugInfo); + gtSetStmtInfo(nullcheckStmt); + fgSetStmtSeq(nullcheckStmt); + fgInsertStmtAtEnd(nullcheckBb, nullcheckStmt); + + // Fallback basic block + BasicBlock* fallbackBb = fgNewBBafter(BBJ_NONE, nullcheckBb, true); + fallbackBb->bbFlags |= BBF_INTERNAL; + + GenTreeCall* fallbackCall = gtCloneExpr(call)->AsCall(); + fallbackCall->gtArgs.GetArgByIndex(0)->SetLateNode(gtClone(ctxTree)); + gtSetEvalOrder(fallbackCall); + fgMorphCall(fallbackCall); + assert(!fallbackCall->IsExpRuntimeLookup()); + assert(ctxTree->OperIs(GT_LCL_VAR)); + Statement* asgFallbackStmt = fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fallbackCall)); + asgFallbackStmt->SetDebugInfo(debugInfo); + fgInsertStmtAtBeg(fallbackBb, asgFallbackStmt); + gtSetStmtInfo(asgFallbackStmt); + fgSetStmtSeq(asgFallbackStmt); + gtUpdateTreeAncestorsSideEffects(fallbackCall); + + // Fast-path basic block + BasicBlock* fastPathBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); + fastPathBb->bbFlags |= BBF_INTERNAL; + Statement* asgFastPathValueStmt = + fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone)); + asgFastPathValueStmt->SetDebugInfo(debugInfo); + fgInsertStmtAtBeg(fastPathBb, asgFastPathValueStmt); + gtSetStmtInfo(asgFastPathValueStmt); + fgSetStmtSeq(asgFastPathValueStmt); + + BasicBlock* sizeCheckBb = nullptr; + if (needsSizeCheck) + { + // Dynamic expansion case (sizeCheckBb is added and some preds are changed): + // + // prevBb(BBJ_NONE): [weight: 1.0] + // + // sizeCheckBb(BBJ_COND): [weight: 1.0] + // if (sizeValue <= offsetValue) + // goto fallbackBb; + // ... + // + // nullcheckBb(BBJ_COND): [weight: 0.8] + // if (*fastPathValue == null) + // goto fallbackBb; + // + // fastPathBb(BBJ_ALWAYS): [weight: 0.64] + // rtLookupLcl = *fastPathValue; + // goto block; + // + // fallbackBb(BBJ_NONE): [weight: 0.36] + // rtLookupLcl = HelperCall(); + // + // block(...): [weight: 1.0] + // use(rtLookupLcl); + // + + sizeCheckBb = fgNewBBbefore(BBJ_COND, nullcheckBb, true); + sizeCheckBb->bbFlags |= BBF_INTERNAL; + + // sizeValue = dictionary[pRuntimeLookup->sizeOffset] + GenTreeIntCon* sizeOffset = gtNewIconNode(runtimeLookup.sizeOffset, TYP_I_IMPL); + assert(lastIndOfTree != nullptr); + GenTree* sizeValueOffset = gtNewOperNode(GT_ADD, TYP_I_IMPL, lastIndOfTree, sizeOffset); + GenTree* sizeValue = gtNewOperNode(GT_IND, TYP_I_IMPL, sizeValueOffset); + sizeValue->gtFlags |= GTF_IND_NONFAULTING; + + // sizeCheck fails if sizeValue <= pRuntimeLookup->offsets[i] + GenTree* offsetValue = + gtNewIconNode(runtimeLookup.offsets[runtimeLookup.indirections - 1], TYP_I_IMPL); + GenTree* sizeCheck = gtNewOperNode(GT_LE, TYP_INT, sizeValue, offsetValue); + sizeCheck->gtFlags |= GTF_RELOP_JMP_USED; + gtSetEvalOrder(sizeCheck); + Statement* sizeCheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck)); + sizeCheckStmt->SetDebugInfo(debugInfo); + gtSetStmtInfo(sizeCheckStmt); + fgSetStmtSeq(sizeCheckStmt); + fgInsertStmtAtEnd(sizeCheckBb, sizeCheckStmt); + } + + // Replace call with rtLookupLclNum local + call->ReplaceWith(gtNewLclvNode(rtLookupLclNum, call->TypeGet()), this); + gtUpdateTreeAncestorsSideEffects(call); + gtSetStmtInfo(stmt); + fgSetStmtSeq(stmt); + + // Connect all new blocks together + fgRemoveRefPred(block, prevBb); + fgAddRefPred(block, fastPathBb); + fgAddRefPred(block, fallbackBb); + nullcheckBb->bbJumpDest = fallbackBb; + fastPathBb->bbJumpDest = block; + + if (needsSizeCheck) + { + // Size check is the first block after prevBb + fgAddRefPred(sizeCheckBb, prevBb); + + // sizeCheckBb flows into nullcheckBb in case if the size check passes + fgAddRefPred(nullcheckBb, sizeCheckBb); + + // fallbackBb is reachable from both nullcheckBb and sizeCheckBb + fgAddRefPred(fallbackBb, nullcheckBb); + fgAddRefPred(fallbackBb, sizeCheckBb); + + // fastPathBb is only reachable from successful nullcheckBb + fgAddRefPred(fastPathBb, nullcheckBb); + + // sizeCheckBb fails - jump to fallbackBb + sizeCheckBb->bbJumpDest = fallbackBb; + } + else + { + // nullcheckBb is the first block after prevBb + fgAddRefPred(nullcheckBb, prevBb); + + // No size check, nullcheckBb jumps to fast path + fgAddRefPred(fastPathBb, nullcheckBb); + + // fallbackBb is only reachable from nullcheckBb (jump destination) + fgAddRefPred(fallbackBb, nullcheckBb); + } + + // Some quick validation + assert(prevBb->NumSucc() == 1); + if (needsSizeCheck) + { + assert(prevBb->GetSucc(0) == sizeCheckBb); + assert(sizeCheckBb->NumSucc() == 2); + } + else + { + assert(prevBb->GetSucc(0) == nullcheckBb); + } + assert(nullcheckBb->NumSucc() == 2); + assert(fastPathBb->NumSucc() == 1); + assert(fallbackBb->NumSucc() == 1); + assert(fastPathBb->GetSucc(0) == block); + assert(fallbackBb->GetSucc(0) == block); + + // Re-distribute weights (see '[weight: X]' on the diagrams above) + + block->inheritWeight(prevBb); + + if (needsSizeCheck) + { + sizeCheckBb->inheritWeight(prevBb); + + // 80% chance we pass nullcheck + nullcheckBb->inheritWeightPercentage(sizeCheckBb, 80); + + // 64% (0.8 * 0.8) chance we pass both nullcheck and sizecheck + fastPathBb->inheritWeightPercentage(nullcheckBb, 80); + + // 100-64=36% chance we fail either nullcheck or sizecheck + fallbackBb->inheritWeightPercentage(sizeCheckBb, 36); + } + else + { + nullcheckBb->inheritWeight(prevBb); + + // 80% chance we pass nullcheck + fastPathBb->inheritWeightPercentage(nullcheckBb, 80); + + // 20% chance we fail nullcheck (TODO: Consider making it cold (0%)) + fallbackBb->inheritWeightPercentage(nullcheckBb, 20); + } + + // Update loop info + if (prevBb->bbNatLoopNum != BasicBlock::NOT_IN_LOOP) + { + nullcheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; + fastPathBb->bbNatLoopNum = prevBb->bbNatLoopNum; + fallbackBb->bbNatLoopNum = prevBb->bbNatLoopNum; + if (needsSizeCheck) + { + sizeCheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; + } + + // Update lpBottom after block split + if (optLoopTable[prevBb->bbNatLoopNum].lpBottom == prevBb) + { + optLoopTable[prevBb->bbNatLoopNum].lpBottom = block; + } + } + + // All blocks are expected to be in the same EH region + assert(BasicBlock::sameEHRegion(prevBb, block)); + assert(BasicBlock::sameEHRegion(prevBb, nullcheckBb)); + assert(BasicBlock::sameEHRegion(prevBb, fastPathBb)); + if (needsSizeCheck) + { + assert(BasicBlock::sameEHRegion(prevBb, sizeCheckBb)); + } + + // Scan current block again, the current call will be ignored because of ClearExpRuntimeLookup. + // We don't try to re-use expansions for the same lookups in the current block here - CSE is responsible + // for that + result = PhaseStatus::MODIFIED_EVERYTHING; + block = prevBb; + goto TRAVERSE_BLOCK_AGAIN; + } + prevStmt = stmt; + } + } + + if (result == PhaseStatus::MODIFIED_EVERYTHING) + { + if (opts.OptimizationEnabled()) + { + fgReorderBlocks(/* useProfileData */ false); + fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); + } + +#ifdef DEBUG + if (verbose) + { + printf("\n*************** After fgExpandRuntimeLookups()\n"); + fgDispBasicBlocks(true); + } +#endif + } + + return result; +} From 2ff37996df1b00fb597eaaf030042b808615373a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 3 Mar 2023 14:15:34 +0100 Subject: [PATCH 32/63] Clean up, address some of the feedback --- src/coreclr/jit/importer.cpp | 4 ++++ src/coreclr/jit/runtimelookup.cpp | 34 +++++++++++++++---------------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 92227dd5ed430a..b6626a553dac4b 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1887,6 +1887,10 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); + // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after + // "Expand runtime lookups" phase. + argNode->gtFlags |= GTF_DONT_CSE; + // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 impInlineRoot()->setMethodHasExpRuntimeLookup(); diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 30641f2e8f0880..304a18168c0ea2 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -20,6 +20,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() { #ifdef DEBUG + // To make sure doesMethodHaveExpRuntimeLookup() is not lying to us: for (BasicBlock* block : Blocks()) { for (Statement* stmt : block->Statements()) @@ -62,7 +63,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if (call->IsTailCall()) { - assert(!"Unexpected runtime lookup as a tail call"); + // It is very unlikely to happen but just in case continue; } @@ -71,25 +72,18 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() GenTree* sigTree = call->gtArgs.GetArgByIndex(1)->GetNode(); void* signature = nullptr; - if (sigTree->IsCnsIntOrI()) + if (sigTree->gtEffectiveVal()->IsCnsIntOrI()) { - signature = (void*)sigTree->AsIntCon()->IconValue(); + signature = (void*)sigTree->gtEffectiveVal()->AsIntCon()->IconValue(); } else { - // signature is not a constant (CSE'd?) - let's see if we can access it via VN - if (vnStore->IsVNConstant(sigTree->gtVNPair.GetLiberal())) - { - signature = (void*)vnStore->CoercedConstantValue(sigTree->gtVNPair.GetLiberal()); - } - else - { - // Technically, it is possible (e.g. it was CSE'd and then VN was erased), but for Debug mode we - // want to catch such cases as we really don't want to emit just a fallback call - it's too slow - assert(!"can't restore signature argument value"); - continue; - } + // It should be still possible to restore signature and compileTimeHandle from VN + // but let's see if it's worth the effort. Signature node is marked as DONT_CSE in importer + assert(!"can't restore signature argument value"); + continue; } + assert(signature != nullptr); CORINFO_RUNTIME_LOOKUP runtimeLookup = {}; @@ -110,7 +104,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() BasicBlockFlags originalFlags = block->bbFlags; BasicBlock* prevBb = block; - if (prevStmt == nullptr || opts.OptimizationDisabled()) + if (prevStmt == nullptr) { JITDUMP("Splitting " FMT_BB " at the beginning.\n", prevBb->bbNum) block = fgSplitBlockAtBeginning(prevBb); @@ -241,8 +235,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() BasicBlock* fallbackBb = fgNewBBafter(BBJ_NONE, nullcheckBb, true); fallbackBb->bbFlags |= BBF_INTERNAL; - GenTreeCall* fallbackCall = gtCloneExpr(call)->AsCall(); - fallbackCall->gtArgs.GetArgByIndex(0)->SetLateNode(gtClone(ctxTree)); + GenTree* signatureArg = + gtNewIconEmbHndNode(signature, nullptr, GTF_ICON_GLOBAL_PTR, + (void*)sigTree->gtEffectiveVal()->AsIntCon()->gtCompileTimeHandle); + fgUpdateConstTreeValueNumber(signatureArg); + GenTreeCall* fallbackCall = + gtNewHelperCallNode(runtimeLookup.helper, TYP_I_IMPL, gtClone(ctxTree), signatureArg); gtSetEvalOrder(fallbackCall); fgMorphCall(fallbackCall); assert(!fallbackCall->IsExpRuntimeLookup()); From 11cac118ae1907440417fb76ba06c2489dc73636 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 3 Mar 2023 19:54:42 +0100 Subject: [PATCH 33/63] Address feedback [WIP] --- src/coreclr/jit/compiler.h | 2 ++ src/coreclr/jit/compiler.hpp | 21 +++++++++++++++++++++ src/coreclr/jit/gentree.cpp | 25 +++++++++++++++++++++++++ src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/lclmorph.cpp | 29 ++++------------------------- src/coreclr/jit/runtimelookup.cpp | 11 ++++++----- 6 files changed, 59 insertions(+), 31 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 381e2ace3f69c8..14fafe46e54439 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2929,6 +2929,8 @@ class Compiler GenTreeFlags GenTreeFlags = GTF_SIDE_EFFECT, bool ignoreRoot = false); + void gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoint); + // Static fields of struct types (and sometimes the types that those are reduced to) are represented by having the // static field contain an object pointer to the boxed struct. This simplifies the GC implementation...but // complicates the JIT somewhat. This predicate returns "true" iff a node with type "fieldNodeType", representing diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 94dcf70963a413..8cec0408b70902 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -1610,6 +1610,27 @@ inline void GenTree::BashToZeroConst(var_types type) } } +//------------------------------------------------------------------------ +// BashToLclVar: Bash node to a LCL_VAR. +// +// Arguments: +// comp - compiler object +// lclNum - the local's number +// +// Return Value: +// The bashed node. +// +inline GenTreeLclVar* GenTree::BashToLclVar(Compiler* comp, unsigned lclNum) +{ + LclVarDsc* varDsc = comp->lvaGetDesc(lclNum); + + ChangeOper(GT_LCL_VAR); + ChangeType(varDsc->lvNormalizeOnLoad() ? varDsc->TypeGet() : genActualType(varDsc)); + AsLclVar()->SetLclNum(lclNum); + + return AsLclVar(); +} + /***************************************************************************** * * Returns true if the node is of the "ovf" variety, for example, add.ovf.i1. diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index bf8a6ef3b92689..cabfa1b44f62ea 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16212,6 +16212,31 @@ bool Compiler::gtTreeHasSideEffects(GenTree* tree, GenTreeFlags flags /* = GTF_S return true; } +void Compiler::gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoint) +{ + class SideEffectSeparator final : public GenTreeVisitor + { + public: + enum + { + DoPreOrder = true, + UseExecutionOrder = true + }; + + SideEffectSeparator(Compiler* compiler) : GenTreeVisitor(compiler) + { + // TODO: + } + + fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) + { + return Compiler::WALK_SKIP_SUBTREES; + } + }; + SideEffectSeparator extractor(this); + extractor.WalkTree(stmt->GetRootNodePointer(), nullptr); +} + //------------------------------------------------------------------------ // gtExtractSideEffList: Extracts side effects from the given expression. // diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index d44e6ad138b5c5..227c8de406be2f 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1943,8 +1943,8 @@ struct GenTree template void BashToConst(T value, var_types type = TYP_UNDEF); - void BashToZeroConst(var_types type); + GenTreeLclVar* BashToLclVar(Compiler* comp, unsigned lclNum); #if NODEBASH_STATS static void RecordOperBashing(genTreeOps operOld, genTreeOps operNew); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 78994398456b0a..79411d559a87b0 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1154,7 +1154,7 @@ class LocalAddressVisitor final : public GenTreeVisitor case IndirTransform::BitCast: indir->ChangeOper(GT_BITCAST); - lclNode = BashToLclVar(indir->gtGetOp1(), lclNum); + lclNode = indir->gtGetOp1()->BashToLclVar(m_compiler, lclNum); break; #ifdef FEATURE_HW_INTRINSICS @@ -1166,7 +1166,7 @@ class LocalAddressVisitor final : public GenTreeVisitor { GenTree* hwiNode = nullptr; var_types elementType = indir->TypeGet(); - lclNode = BashToLclVar(indir->gtGetOp1(), lclNum); + lclNode = indir->gtGetOp1()->BashToLclVar(m_compiler, lclNum); if (elementType == TYP_FLOAT) { @@ -1195,7 +1195,7 @@ class LocalAddressVisitor final : public GenTreeVisitor GenTree* hwiNode = nullptr; var_types elementType = indir->TypeGet(); - lclNode = BashToLclVar(indir, lclNum); + lclNode = indir->BashToLclVar(m_compiler, lclNum); GenTree* simdLclNode = m_compiler->gtNewLclvNode(lclNum, varDsc->TypeGet()); GenTree* elementNode = user->gtGetOp2(); @@ -1252,7 +1252,7 @@ class LocalAddressVisitor final : public GenTreeVisitor assert(genTypeSize(varDsc) >= genTypeSize(indir)); assert(!isDef); - lclNode = BashToLclVar(indir->gtGetOp1(), lclNum); + lclNode = indir->gtGetOp1()->BashToLclVar(m_compiler, lclNum); *val.Use() = m_compiler->gtNewCastNode(genActualType(indir), lclNode, false, indir->TypeGet()); break; @@ -1670,27 +1670,6 @@ class LocalAddressVisitor final : public GenTreeVisitor return (user == nullptr) || (user->OperIs(GT_COMMA) && (user->AsOp()->gtGetOp1() == node)); } - //------------------------------------------------------------------------ - // BashToLclVar: Bash node to a LCL_VAR. - // - // Arguments: - // node - the node to bash - // lclNum - the local's number - // - // Return Value: - // The bashed node. - // - GenTreeLclVar* BashToLclVar(GenTree* node, unsigned lclNum) - { - LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); - - node->ChangeOper(GT_LCL_VAR); - node->ChangeType(varDsc->lvNormalizeOnLoad() ? varDsc->TypeGet() : genActualType(varDsc)); - node->AsLclVar()->SetLclNum(lclNum); - - return node->AsLclVar(); - } - void SequenceLocal(GenTreeLclVarCommon* lcl) { if (m_sequencer != nullptr) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 304a18168c0ea2..1a8bff161d840e 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -121,6 +121,11 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() block->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL); + // We've just split a block (e.g. in the middle of it) into two blocks. + // We have to do the same for the current statement - move all side effects before the runtime + // lookup to prevBb + gtSplitTree(block, stmt, call); + // Define a local for the result const unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; @@ -128,10 +133,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Save expression to a local and append as the last statement in prevBb auto spillExpr = [&](GenTree* expr) -> GenTree* { - if (expr->OperIs(GT_LCL_VAR)) - { - return gtClone(expr); - } unsigned const tmpNum = lvaGrabTemp(false DEBUGARG("spilling expr")); lvaTable[tmpNum].lvType = expr->TypeGet(); Statement* asgStmt = fgNewStmtAtEnd(prevBb, gtNewTempAssign(tmpNum, expr)); @@ -313,7 +314,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } // Replace call with rtLookupLclNum local - call->ReplaceWith(gtNewLclvNode(rtLookupLclNum, call->TypeGet()), this); + call->BashToLclVar(this, rtLookupLclNum); gtUpdateTreeAncestorsSideEffects(call); gtSetStmtInfo(stmt); fgSetStmtSeq(stmt); From 936c7d2a09ce9e80aa5fba0411e7e75488fd3e94 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 4 Mar 2023 01:38:11 +0100 Subject: [PATCH 34/63] Implement gtSplitTree --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/gentree.cpp | 142 ++++++++++++++++++++++++++++-- src/coreclr/jit/runtimelookup.cpp | 4 +- 3 files changed, 139 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 14fafe46e54439..b35c6b2b49ec0c 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2929,7 +2929,7 @@ class Compiler GenTreeFlags GenTreeFlags = GTF_SIDE_EFFECT, bool ignoreRoot = false); - void gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoint); + void gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse); // Static fields of struct types (and sometimes the types that those are reduced to) are represented by having the // static field contain an object pointer to the boxed struct. This simplifies the GC implementation...but diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index cabfa1b44f62ea..97fb65bdab0106 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16212,29 +16212,157 @@ bool Compiler::gtTreeHasSideEffects(GenTree* tree, GenTreeFlags flags /* = GTF_S return true; } -void Compiler::gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoint) +void Compiler::gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse) { - class SideEffectSeparator final : public GenTreeVisitor + class Splitter final : public GenTreeVisitor { + BasicBlock* m_bb; + Statement* m_splitStmt; + GenTree* m_splitNode; + + struct UseInfo + { + GenTree** Use; + GenTree* User; + }; + ArrayStack m_useStack; + public: enum { DoPreOrder = true, + DoPostOrder = true, UseExecutionOrder = true }; - SideEffectSeparator(Compiler* compiler) : GenTreeVisitor(compiler) + Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode) : GenTreeVisitor(compiler), m_bb(bb), m_splitStmt(stmt), m_splitNode(splitNode), m_useStack(compiler->getAllocator(CMK_ArrayStack)) { - // TODO: } + Statement* FirstStatement = nullptr; + GenTree** SplitNodeUse = nullptr; + fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) { - return Compiler::WALK_SKIP_SUBTREES; + m_useStack.Push(UseInfo{ use, user }); + return WALK_CONTINUE; + } + + fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) + { + if (*use == m_splitNode) + { + GenTree* ancestor = *use; + while (!m_useStack.Empty()) + { + while (m_useStack.Top(0).User == ancestor) + { + SplitOutUse(m_useStack.Pop()); + } + + assert(*m_useStack.Top(0).Use == ancestor); + ancestor = m_useStack.Pop().User; + } + + SplitNodeUse = use; + + return WALK_ABORT; + } + + while (m_useStack.Top(0).Use != use) + { + m_useStack.Pop(); + } + + return WALK_CONTINUE; + } + + private: + void SplitOutUse(const UseInfo& useInf) + { + GenTree** use = useInf.Use; + GenTree* user = useInf.User; + + if ((*use)->IsInvariant()) + { + return; + } + + assert((user == nullptr) || !user->OperIs(GT_ADDR)); + + if ((user != nullptr) && user->OperIs(GT_ASG) && (use == &user->AsOp()->gtOp1)) + { + // ASGs are special -- the evaluation of the immediate first + // operand happens as part of the assignment, but its children + // are still evaluated 'as normal'. + // + // ADDR is the same but we never expect to have to handle it + // here -- it is a unary node, so it cannot be a sibling to the + // node we are splitting out. + // + assert((*use)->OperIs(GT_IND, GT_OBJ, GT_BLK, GT_LCL_VAR, GT_LCL_FLD)); + if ((*use)->OperIsUnary()) + { + user = *use; + use = &(*use)->AsUnOp()->gtOp1; + } + else + { + return; + } + } + + Statement* stmt = nullptr; + if (!(*use)->IsValue() || (*use)->OperIs(GT_ASG) || (user == nullptr) || (user->OperIs(GT_COMMA) && user->gtGetOp1() == *use)) + { + GenTree* sideEffects = nullptr; + m_compiler->gtExtractSideEffList(*use, &sideEffects); + if (sideEffects != nullptr) + { + stmt = m_compiler->fgNewStmtFromTree(sideEffects, m_splitStmt->GetDebugInfo()); + } + *use = m_compiler->gtNewNothingNode(); + } + else + { + unsigned lclNum = m_compiler->lvaGrabTemp(true, "Spilling to split statement for tree"); + GenTree* asg = m_compiler->gtNewTempAssign(lclNum, *use); + stmt = m_compiler->fgNewStmtFromTree(asg, m_splitStmt->GetDebugInfo()); + *use = m_compiler->gtNewLclvNode(lclNum, genActualType(*use)); + } + + if (stmt != nullptr) + { + if (FirstStatement == nullptr) + { + FirstStatement = m_splitStmt; + } + + m_compiler->fgInsertStmtBefore(m_bb, FirstStatement, stmt); + FirstStatement = stmt; + } } }; - SideEffectSeparator extractor(this); - extractor.WalkTree(stmt->GetRootNodePointer(), nullptr); + + GenTree* rootNode = stmt->GetRootNode(); + if (rootNode == splitPoint) + { + *firstNewStmt = nullptr; + *splitNodeUse = stmt->GetRootNodePointer(); + return; + } + + if (rootNode->OperIs(GT_ASG) && rootNode->gtGetOp1()->OperIs(GT_LCL_VAR) && rootNode->gtGetOp2() == splitPoint) + { + *firstNewStmt = nullptr; + *splitNodeUse = &rootNode->AsOp()->gtOp2; + return; + } + + Splitter splitter(this, block, stmt, splitPoint); + splitter.WalkTree(stmt->GetRootNodePointer(), nullptr); + *firstNewStmt = splitter.FirstStatement; + *splitNodeUse = splitter.SplitNodeUse; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 1a8bff161d840e..ec105f2a6932a8 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -124,7 +124,9 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // We've just split a block (e.g. in the middle of it) into two blocks. // We have to do the same for the current statement - move all side effects before the runtime // lookup to prevBb - gtSplitTree(block, stmt, call); + Statement* firstNewStmt; + GenTree** callUse; + gtSplitTree(block, stmt, call, &firstNewStmt, &callUse); // Define a local for the result const unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); From a982e68fc0604432dfeb59c8f3df5229eae1c6f7 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 4 Mar 2023 13:31:05 +0100 Subject: [PATCH 35/63] Test --- src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/gentree.cpp | 39 ++++++++++------- src/coreclr/jit/runtimelookup.cpp | 71 +++++++++++++++---------------- 3 files changed, 61 insertions(+), 52 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b35c6b2b49ec0c..c5fa280df28dfc 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2929,7 +2929,8 @@ class Compiler GenTreeFlags GenTreeFlags = GTF_SIDE_EFFECT, bool ignoreRoot = false); - void gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse); + void gtSplitTree( + BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitPointUse); // Static fields of struct types (and sometimes the types that those are reduced to) are represented by having the // static field contain an object pointer to the boxed struct. This simplifies the GC implementation...but diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 97fb65bdab0106..2994f5ac10ec94 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16212,18 +16212,19 @@ bool Compiler::gtTreeHasSideEffects(GenTree* tree, GenTreeFlags flags /* = GTF_S return true; } -void Compiler::gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse) +void Compiler::gtSplitTree( + BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse) { class Splitter final : public GenTreeVisitor { BasicBlock* m_bb; - Statement* m_splitStmt; - GenTree* m_splitNode; + Statement* m_splitStmt; + GenTree* m_splitNode; struct UseInfo { GenTree** Use; - GenTree* User; + GenTree* User; }; ArrayStack m_useStack; @@ -16235,16 +16236,21 @@ void Compiler::gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoi UseExecutionOrder = true }; - Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode) : GenTreeVisitor(compiler), m_bb(bb), m_splitStmt(stmt), m_splitNode(splitNode), m_useStack(compiler->getAllocator(CMK_ArrayStack)) + Splitter(Compiler* compiler, BasicBlock* bb, Statement* stmt, GenTree* splitNode) + : GenTreeVisitor(compiler) + , m_bb(bb) + , m_splitStmt(stmt) + , m_splitNode(splitNode) + , m_useStack(compiler->getAllocator(CMK_ArrayStack)) { } Statement* FirstStatement = nullptr; - GenTree** SplitNodeUse = nullptr; + GenTree** SplitNodeUse = nullptr; fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) { - m_useStack.Push(UseInfo{ use, user }); + m_useStack.Push(UseInfo{use, user}); return WALK_CONTINUE; } @@ -16280,8 +16286,8 @@ void Compiler::gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoi private: void SplitOutUse(const UseInfo& useInf) { - GenTree** use = useInf.Use; - GenTree* user = useInf.User; + GenTree** use = useInf.Use; + GenTree* user = useInf.User; if ((*use)->IsInvariant()) { @@ -16304,7 +16310,7 @@ void Compiler::gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoi if ((*use)->OperIsUnary()) { user = *use; - use = &(*use)->AsUnOp()->gtOp1; + use = &(*use)->AsUnOp()->gtOp1; } else { @@ -16313,7 +16319,8 @@ void Compiler::gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoi } Statement* stmt = nullptr; - if (!(*use)->IsValue() || (*use)->OperIs(GT_ASG) || (user == nullptr) || (user->OperIs(GT_COMMA) && user->gtGetOp1() == *use)) + if (!(*use)->IsValue() || (*use)->OperIs(GT_ASG) || (user == nullptr) || + (user->OperIs(GT_COMMA) && user->gtGetOp1() == *use)) { GenTree* sideEffects = nullptr; m_compiler->gtExtractSideEffList(*use, &sideEffects); @@ -16326,9 +16333,9 @@ void Compiler::gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoi else { unsigned lclNum = m_compiler->lvaGrabTemp(true, "Spilling to split statement for tree"); - GenTree* asg = m_compiler->gtNewTempAssign(lclNum, *use); - stmt = m_compiler->fgNewStmtFromTree(asg, m_splitStmt->GetDebugInfo()); - *use = m_compiler->gtNewLclvNode(lclNum, genActualType(*use)); + GenTree* asg = m_compiler->gtNewTempAssign(lclNum, *use); + stmt = m_compiler->fgNewStmtFromTree(asg, m_splitStmt->GetDebugInfo()); + *use = m_compiler->gtNewLclvNode(lclNum, genActualType(*use)); } if (stmt != nullptr) @@ -16337,7 +16344,9 @@ void Compiler::gtSplitTree(BasicBlock* block, Statement* stmt, GenTree* splitPoi { FirstStatement = m_splitStmt; } - + m_compiler->gtUpdateStmtSideEffects(stmt); + m_compiler->gtSetStmtInfo(stmt); + m_compiler->fgSetStmtSeq(stmt); m_compiler->fgInsertStmtBefore(m_bb, FirstStatement, stmt); FirstStatement = stmt; } diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index ec105f2a6932a8..8835062f5df758 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -40,8 +40,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() for (BasicBlock* block : Blocks()) { TRAVERSE_BLOCK_AGAIN: - - Statement* prevStmt = nullptr; for (Statement* const stmt : block->Statements()) { for (GenTree* const tree : stmt->TreeList()) @@ -68,7 +66,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } // call(ctx, signature); - GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); GenTree* sigTree = call->gtArgs.GetArgByIndex(1)->GetNode(); void* signature = nullptr; @@ -78,12 +75,19 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } else { - // It should be still possible to restore signature and compileTimeHandle from VN - // but let's see if it's worth the effort. Signature node is marked as DONT_CSE in importer - assert(!"can't restore signature argument value"); - continue; + // signature is not a constant (CSE'd?) - let's see if we can access it via VN + if (vnStore->IsVNConstant(sigTree->gtVNPair.GetLiberal())) + { + signature = (void*)vnStore->CoercedConstantValue(sigTree->gtVNPair.GetLiberal()); + } + else + { + // Technically, it is possible (e.g. it was CSE'd and then VN was erased), but for Debug mode we + // want to catch such cases as we really don't want to emit just a fallback call - it's too slow + assert(!"can't restore signature argument value"); + continue; + } } - assert(signature != nullptr); CORINFO_RUNTIME_LOOKUP runtimeLookup = {}; @@ -101,18 +105,23 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() assert(runtimeLookup.indirections != 0); assert(runtimeLookup.testForNull); + Statement* firstNewStmt = nullptr; + GenTree** callUse = nullptr; + gtSplitTree(block, stmt, call, &firstNewStmt, &callUse); + BasicBlockFlags originalFlags = block->bbFlags; BasicBlock* prevBb = block; - if (prevStmt == nullptr) + if (stmt == block->firstStmt()) { - JITDUMP("Splitting " FMT_BB " at the beginning.\n", prevBb->bbNum) block = fgSplitBlockAtBeginning(prevBb); } else { - JITDUMP("Splitting " FMT_BB " after statement " FMT_STMT "\n", prevBb->bbNum, prevStmt->GetID()) - block = fgSplitBlockAfterStatement(prevBb, prevStmt); + assert(stmt->GetPrevStmt() != block->lastStmt()); + JITDUMP("Splitting " FMT_BB " after statement " FMT_STMT "\n", prevBb->bbNum, + stmt->GetPrevStmt()->GetID()) + block = fgSplitBlockAfterStatement(prevBb, stmt->GetPrevStmt()); } // We split a block, possibly, in the middle - we need to propagate some flags @@ -121,13 +130,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() block->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL); - // We've just split a block (e.g. in the middle of it) into two blocks. - // We have to do the same for the current statement - move all side effects before the runtime - // lookup to prevBb - Statement* firstNewStmt; - GenTree** callUse; - gtSplitTree(block, stmt, call, &firstNewStmt, &callUse); - // Define a local for the result const unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; @@ -141,17 +143,22 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() asgStmt->SetDebugInfo(debugInfo); gtSetStmtInfo(asgStmt); fgSetStmtSeq(asgStmt); + gtUpdateStmtSideEffects(asgStmt); return gtNewLclvNode(tmpNum, expr->TypeGet()); }; - // if sigTree was not a constant e.g. COMMA(..., CNS)) - spill it - if (!sigTree->IsCnsIntOrI()) + GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); + GenTree* sigNode = call->gtArgs.GetArgByIndex(1)->GetNode(); + if (!ctxTree->OperIs(GT_LCL_VAR)) + { + ctxTree = spillExpr(ctxTree); + } + if (!sigNode->OperIs(GT_LCL_VAR, GT_CNS_INT)) { - spillExpr(sigTree); + sigNode = spillExpr(sigNode); } // Prepare slotPtr tree (TODO: consider sharing this part with impRuntimeLookup) - ctxTree = spillExpr(ctxTree); GenTree* slotPtrTree = gtClone(ctxTree); GenTree* indOffTree = nullptr; GenTree* lastIndOfTree = nullptr; @@ -221,8 +228,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() GenTree* fastPathValue = gtNewOperNode(GT_IND, TYP_I_IMPL, gtCloneExpr(slotPtrTree)); fastPathValue->gtFlags |= GTF_IND_NONFAULTING; - GenTree* fastPathValueClone = - opts.OptimizationEnabled() ? fgMakeMultiUse(&fastPathValue) : gtCloneExpr(fastPathValue); + GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); // Save dictionary slot to a local (to be used by fast path) GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); @@ -238,14 +244,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() BasicBlock* fallbackBb = fgNewBBafter(BBJ_NONE, nullcheckBb, true); fallbackBb->bbFlags |= BBF_INTERNAL; - GenTree* signatureArg = - gtNewIconEmbHndNode(signature, nullptr, GTF_ICON_GLOBAL_PTR, - (void*)sigTree->gtEffectiveVal()->AsIntCon()->gtCompileTimeHandle); - fgUpdateConstTreeValueNumber(signatureArg); - GenTreeCall* fallbackCall = - gtNewHelperCallNode(runtimeLookup.helper, TYP_I_IMPL, gtClone(ctxTree), signatureArg); - gtSetEvalOrder(fallbackCall); - fgMorphCall(fallbackCall); + GenTreeCall* fallbackCall = gtCloneExpr(call)->AsCall(); assert(!fallbackCall->IsExpRuntimeLookup()); assert(ctxTree->OperIs(GT_LCL_VAR)); Statement* asgFallbackStmt = fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fallbackCall)); @@ -318,8 +317,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Replace call with rtLookupLclNum local call->BashToLclVar(this, rtLookupLclNum); gtUpdateTreeAncestorsSideEffects(call); - gtSetStmtInfo(stmt); - fgSetStmtSeq(stmt); // Connect all new blocks together fgRemoveRefPred(block, prevBb); @@ -420,6 +417,9 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() optLoopTable[prevBb->bbNatLoopNum].lpBottom = block; } } + gtUpdateStmtSideEffects(stmt); + gtSetStmtInfo(stmt); + fgSetStmtSeq(stmt); // All blocks are expected to be in the same EH region assert(BasicBlock::sameEHRegion(prevBb, block)); @@ -437,7 +437,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() block = prevBb; goto TRAVERSE_BLOCK_AGAIN; } - prevStmt = stmt; } } From aad1188149d6641624b99ccdd105b9323deb1bb8 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 4 Mar 2023 14:36:55 +0100 Subject: [PATCH 36/63] fix Release --- src/coreclr/jit/gentree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 2994f5ac10ec94..21eeb9efe4bb7e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16332,7 +16332,7 @@ void Compiler::gtSplitTree( } else { - unsigned lclNum = m_compiler->lvaGrabTemp(true, "Spilling to split statement for tree"); + unsigned lclNum = m_compiler->lvaGrabTemp(true DEBUGARG("Spilling to split statement for tree")); GenTree* asg = m_compiler->gtNewTempAssign(lclNum, *use); stmt = m_compiler->fgNewStmtFromTree(asg, m_splitStmt->GetDebugInfo()); *use = m_compiler->gtNewLclvNode(lclNum, genActualType(*use)); From f3853a7f206bd7992b3395f792930f3cd09a3c9b Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 4 Mar 2023 19:11:09 +0100 Subject: [PATCH 37/63] Fix assert --- src/coreclr/jit/gentree.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 21eeb9efe4bb7e..e4828a84bd300a 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16330,6 +16330,20 @@ void Compiler::gtSplitTree( } *use = m_compiler->gtNewNothingNode(); } + else if ((*use)->OperIs(GT_FIELD_LIST)) + { + GenTreeFieldList* fieldList = (*use)->AsFieldList(); + ArrayStack fieldsStack = m_compiler->getAllocator(CMK_ArrayStack); + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + fieldsStack.Push(&use.NodeRef()); + } + while (!fieldsStack.Empty()) + { + SplitOutUse(UseInfo{fieldsStack.Pop(), fieldList}); + } + return; + } else { unsigned lclNum = m_compiler->lvaGrabTemp(true DEBUGARG("Spilling to split statement for tree")); From f2d22fe51a25985393de46e0f81137d5785ede74 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 5 Mar 2023 01:59:23 +0100 Subject: [PATCH 38/63] Initial clean up --- src/coreclr/jit/gentree.cpp | 16 +- src/coreclr/jit/importer.cpp | 45 +++-- src/coreclr/jit/runtimelookup.cpp | 294 ++++++++++++++---------------- 3 files changed, 180 insertions(+), 175 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index e4828a84bd300a..1d307e93105d9f 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16296,8 +16296,19 @@ void Compiler::gtSplitTree( assert((user == nullptr) || !user->OperIs(GT_ADDR)); + Statement* stmt = nullptr; if ((user != nullptr) && user->OperIs(GT_ASG) && (use == &user->AsOp()->gtOp1)) { + if ((*use)->OperIs(GT_COMMA)) + { + GenTree* sideEffects = nullptr; + m_compiler->gtExtractSideEffList(*use, &sideEffects); + if (sideEffects != nullptr) + { + stmt = m_compiler->fgNewStmtFromTree(sideEffects, m_splitStmt->GetDebugInfo()); + } + } + // ASGs are special -- the evaluation of the immediate first // operand happens as part of the assignment, but its children // are still evaluated 'as normal'. @@ -16306,7 +16317,8 @@ void Compiler::gtSplitTree( // here -- it is a unary node, so it cannot be a sibling to the // node we are splitting out. // - assert((*use)->OperIs(GT_IND, GT_OBJ, GT_BLK, GT_LCL_VAR, GT_LCL_FLD)); + + assert((*use)->OperIs(GT_IND, GT_OBJ, GT_BLK, GT_LCL_VAR, GT_LCL_FLD, GT_COMMA)); if ((*use)->OperIsUnary()) { user = *use; @@ -16318,7 +16330,7 @@ void Compiler::gtSplitTree( } } - Statement* stmt = nullptr; + stmt = nullptr; if (!(*use)->IsValue() || (*use)->OperIs(GT_ASG) || (user == nullptr) || (user->OperIs(GT_COMMA) && user->gtGetOp1() == *use)) { diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index b6626a553dac4b..4e7591edfc417a 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1883,25 +1883,42 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken // Call the helper // - Setup argNode with the pointer to the signature returned by the lookup - assert(pRuntimeLookup->signature != nullptr); GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); - // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after - // "Expand runtime lookups" phase. - argNode->gtFlags |= GTF_DONT_CSE; - - // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. - // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 - impInlineRoot()->setMethodHasExpRuntimeLookup(); - helperCall->SetExpRuntimeLookup(); - - if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) + if (opts.OptimizationEnabled() || (pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK)) { - JITDUMP("Registering %p in SignatureToLookupInfoMap\n", pRuntimeLookup->signature) - impInlineRoot()->GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); + // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after + // "Expand runtime lookups" phase. + argNode->gtFlags |= GTF_DONT_CSE; + + // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. + // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 + impInlineRoot()->setMethodHasExpRuntimeLookup(); + helperCall->SetExpRuntimeLookup(); + if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) + { + JITDUMP("Registering %p in SignatureToLookupInfoMap\n", pRuntimeLookup->signature) + impInlineRoot()->GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); + } + return helperCall; } - return helperCall; + + // Extract the handle + GenTree* handleForNullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); + handleForNullCheck->gtFlags |= GTF_IND_NONFAULTING; + + // Check for null and possibly call helper + GenTree* nullCheck = gtNewOperNode(GT_NE, TYP_INT, handleForNullCheck, gtNewIconNode(0, TYP_I_IMPL)); + GenTree* handleForResult = gtCloneExpr(handleForNullCheck); + + GenTreeColon* colonNullCheck = new (this, GT_COLON) GenTreeColon(TYP_I_IMPL, handleForResult, helperCall); + GenTree* result = gtNewQmarkNode(TYP_I_IMPL, nullCheck, colonNullCheck); + + unsigned tmp = lvaGrabTemp(true DEBUGARG("spilling Runtime Lookup tree")); + + impAssignTempGen(tmp, result, CHECK_SPILL_NONE); + return gtNewLclvNode(tmp, TYP_I_IMPL); } struct RecursiveGuard diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 8835062f5df758..779fd519a65c15 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -6,6 +6,85 @@ #pragma hdrstop #endif +static void* GetConstantPointer(Compiler* comp, GenTree* tree) +{ + void* signature = nullptr; + if (tree->gtEffectiveVal()->IsCnsIntOrI()) + { + signature = (void*)tree->gtEffectiveVal()->AsIntCon()->IconValue(); + } + else if (comp->vnStore->IsVNConstant(tree->gtVNPair.GetLiberal())) + { + // signature is not a constant (CSE'd?) - let's see if we can access it via VN + signature = (void*)comp->vnStore->CoercedConstantValue(tree->gtVNPair.GetLiberal()); + } + return signature; +} + +// Save expression to a local and append as the last statement in prevBb +static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprBlock, DebugInfo& debugInfo) +{ + if (expr->IsInvariant()) + { + return comp->gtCloneExpr(expr); + } + unsigned const tmpNum = comp->lvaGrabTemp(true DEBUGARG("spilling expr")); + comp->lvaTable[tmpNum].lvType = expr->TypeGet(); + Statement* asgStmt = comp->fgNewStmtAtEnd(exprBlock, comp->gtNewTempAssign(tmpNum, expr)); + asgStmt->SetDebugInfo(debugInfo); + comp->gtSetStmtInfo(asgStmt); + comp->fgSetStmtSeq(asgStmt); + return comp->gtNewLclvNode(tmpNum, expr->TypeGet()); +}; + +static BasicBlock* CreateBlockFromTree( + Compiler* comp, BasicBlock* insertAfter, BBjumpKinds blockKind, GenTree* tree, DebugInfo& debugInfo) +{ + // Fast-path basic block + BasicBlock* newBlock = comp->fgNewBBafter(blockKind, insertAfter, true); + newBlock->bbFlags |= BBF_INTERNAL; + comp->gtSetEvalOrder(tree); + Statement* stmt = comp->fgNewStmtFromTree(tree); + comp->fgInsertStmtAtEnd(newBlock, stmt); + stmt->SetDebugInfo(debugInfo); + comp->gtSetStmtInfo(stmt); + comp->fgSetStmtSeq(stmt); + comp->gtUpdateTreeAncestorsSideEffects(tree); + newBlock->bbCodeOffs = insertAfter->bbCodeOffsEnd; + newBlock->bbCodeOffsEnd = insertAfter->bbCodeOffsEnd; + return newBlock; +} + +static BasicBlock* SplitBlockBeforeTree( + Compiler* comp, BasicBlock* block, Statement* stmt, GenTree* splitPoint, BasicBlock** prevBlock) +{ + Statement* firstNewStmt; + GenTree** callUse; + comp->gtSplitTree(block, stmt, splitPoint, &firstNewStmt, &callUse); + + BasicBlockFlags originalFlags = block->bbFlags; + BasicBlock* prevBb = block; + + if (stmt == block->firstStmt()) + { + block = comp->fgSplitBlockAtBeginning(prevBb); + } + else + { + assert(stmt->GetPrevStmt() != block->lastStmt()); + JITDUMP("Splitting " FMT_BB " after statement " FMT_STMT "\n", prevBb->bbNum, stmt->GetPrevStmt()->GetID()) + block = comp->fgSplitBlockAfterStatement(prevBb, stmt->GetPrevStmt()); + } + + // We split a block, possibly, in the middle - we need to propagate some flags + prevBb->bbFlags = originalFlags & (~(BBF_SPLIT_LOST | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL) | BBF_GC_SAFE_POINT); + block->bbFlags |= + originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL); + + *prevBlock = prevBb; + return block; +} + //------------------------------------------------------------------------------ // fgExpandRuntimeLookups : partially expand runtime lookups helper calls // to add a nullcheck [+ size check] and a fast path @@ -39,7 +118,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Find all calls with GTF_CALL_M_EXP_RUNTIME_LOOKUP flag for (BasicBlock* block : Blocks()) { - TRAVERSE_BLOCK_AGAIN: + VISIT_BLOCK_AGAIN: for (Statement* const stmt : block->Statements()) { for (GenTree* const tree : stmt->TreeList()) @@ -50,46 +129,37 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() continue; } assert(tree->IsHelperCall()); - JITDUMP("Expanding runtime lookup for [%06d] in " FMT_BB ":\n", dspTreeID(tree), block->bbNum) DISPTREE(tree) JITDUMP("\n") GenTreeCall* call = tree->AsCall(); + + // Clear ExpRuntimeLookup flag so we won't miss any runtime lookup that needs partial expansion call->ClearExpRuntimeLookup(); - assert(call->gtArgs.CountArgs() == 2); if (call->IsTailCall()) { - // It is very unlikely to happen but just in case + // It is very unlikely to happen and is impossible to represent in C#, but just in case + // let's don't expand it: continue; } - // call(ctx, signature); - GenTree* sigTree = call->gtArgs.GetArgByIndex(1)->GetNode(); - - void* signature = nullptr; - if (sigTree->gtEffectiveVal()->IsCnsIntOrI()) - { - signature = (void*)sigTree->gtEffectiveVal()->AsIntCon()->IconValue(); - } - else + assert(call->gtArgs.CountArgs() == 2); + // The call has the following signature: + // + // type = call(genericCtx, signatureCns); + // + void* signature = GetConstantPointer(this, call->gtArgs.GetArgByIndex(1)->GetNode()); + if (signature == nullptr) { - // signature is not a constant (CSE'd?) - let's see if we can access it via VN - if (vnStore->IsVNConstant(sigTree->gtVNPair.GetLiberal())) - { - signature = (void*)vnStore->CoercedConstantValue(sigTree->gtVNPair.GetLiberal()); - } - else - { - // Technically, it is possible (e.g. it was CSE'd and then VN was erased), but for Debug mode we - // want to catch such cases as we really don't want to emit just a fallback call - it's too slow - assert(!"can't restore signature argument value"); - continue; - } + // Technically, it is possible (e.g. it was CSE'd and then VN was erased), but for Debug mode we + // want to catch such cases as we really don't want to emit just a fallback call - it's too slow + assert(!"can't restore signature argument value"); + continue; } - assert(signature != nullptr); + // Restore runtimeLookup using signature argument via a global dictionary CORINFO_RUNTIME_LOOKUP runtimeLookup = {}; const bool lookupFound = GetSignatureToLookupInfoMap()->Lookup(signature, &runtimeLookup); assert(lookupFound); @@ -105,58 +175,19 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() assert(runtimeLookup.indirections != 0); assert(runtimeLookup.testForNull); - Statement* firstNewStmt = nullptr; - GenTree** callUse = nullptr; - gtSplitTree(block, stmt, call, &firstNewStmt, &callUse); - - BasicBlockFlags originalFlags = block->bbFlags; - BasicBlock* prevBb = block; - - if (stmt == block->firstStmt()) - { - block = fgSplitBlockAtBeginning(prevBb); - } - else - { - assert(stmt->GetPrevStmt() != block->lastStmt()); - JITDUMP("Splitting " FMT_BB " after statement " FMT_STMT "\n", prevBb->bbNum, - stmt->GetPrevStmt()->GetID()) - block = fgSplitBlockAfterStatement(prevBb, stmt->GetPrevStmt()); - } - - // We split a block, possibly, in the middle - we need to propagate some flags - prevBb->bbFlags = - originalFlags & (~(BBF_SPLIT_LOST | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL) | BBF_GC_SAFE_POINT); - block->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | - BBF_LOOP_PREHEADER | BBF_RETLESS_CALL); + // Split block right before the call tree + BasicBlock* prevBb = nullptr; + block = SplitBlockBeforeTree(this, block, stmt, call, &prevBb); + assert(prevBb != nullptr && block != nullptr); // Define a local for the result const unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; GenTreeLclVar* rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); - // Save expression to a local and append as the last statement in prevBb - auto spillExpr = [&](GenTree* expr) -> GenTree* { - unsigned const tmpNum = lvaGrabTemp(false DEBUGARG("spilling expr")); - lvaTable[tmpNum].lvType = expr->TypeGet(); - Statement* asgStmt = fgNewStmtAtEnd(prevBb, gtNewTempAssign(tmpNum, expr)); - asgStmt->SetDebugInfo(debugInfo); - gtSetStmtInfo(asgStmt); - fgSetStmtSeq(asgStmt); - gtUpdateStmtSideEffects(asgStmt); - return gtNewLclvNode(tmpNum, expr->TypeGet()); - }; - - GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); - GenTree* sigNode = call->gtArgs.GetArgByIndex(1)->GetNode(); - if (!ctxTree->OperIs(GT_LCL_VAR)) - { - ctxTree = spillExpr(ctxTree); - } - if (!sigNode->OperIs(GT_LCL_VAR, GT_CNS_INT)) - { - sigNode = spillExpr(sigNode); - } + // Save signature and generic context trees to locals if needed: + GenTree* ctxTree = SpillExpression(this, call->gtArgs.GetArgByIndex(0)->GetNode(), prevBb, debugInfo); + GenTree* sigNode = SpillExpression(this, call->gtArgs.GetArgByIndex(1)->GetNode(), prevBb, debugInfo); // Prepare slotPtr tree (TODO: consider sharing this part with impRuntimeLookup) GenTree* slotPtrTree = gtClone(ctxTree); @@ -166,13 +197,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() { if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) { - indOffTree = spillExpr(slotPtrTree); + indOffTree = SpillExpression(this, slotPtrTree, prevBb, debugInfo); slotPtrTree = gtClone(indOffTree); } // The last indirection could be subject to a size check (dynamic dictionary expansion) const bool isLastIndirectionWithSizeCheck = (i == runtimeLookup.indirections - 1) && needsSizeCheck; - if (i != 0) { slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); @@ -187,15 +217,13 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() { slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, indOffTree, slotPtrTree); } - if (runtimeLookup.offsets[i] != 0) { if (isLastIndirectionWithSizeCheck) { - lastIndOfTree = spillExpr(slotPtrTree); + lastIndOfTree = SpillExpression(this, slotPtrTree, prevBb, debugInfo); slotPtrTree = gtClone(lastIndOfTree); } - slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, slotPtrTree, gtNewIconNode(runtimeLookup.offsets[i], TYP_I_IMPL)); } @@ -222,47 +250,25 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // // null-check basic block - BasicBlock* nullcheckBb = fgNewBBafter(BBJ_COND, prevBb, true); - nullcheckBb->bbFlags |= BBF_INTERNAL; - GenTree* fastPathValue = gtNewOperNode(GT_IND, TYP_I_IMPL, gtCloneExpr(slotPtrTree)); fastPathValue->gtFlags |= GTF_IND_NONFAULTING; - - GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); - // Save dictionary slot to a local (to be used by fast path) + GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; - gtSetEvalOrder(nullcheckOp); - Statement* nullcheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp)); - nullcheckStmt->SetDebugInfo(debugInfo); - gtSetStmtInfo(nullcheckStmt); - fgSetStmtSeq(nullcheckStmt); - fgInsertStmtAtEnd(nullcheckBb, nullcheckStmt); + BasicBlock* nullcheckBb = + CreateBlockFromTree(this, prevBb, BBJ_COND, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), + debugInfo); // Fallback basic block - BasicBlock* fallbackBb = fgNewBBafter(BBJ_NONE, nullcheckBb, true); - fallbackBb->bbFlags |= BBF_INTERNAL; - - GenTreeCall* fallbackCall = gtCloneExpr(call)->AsCall(); - assert(!fallbackCall->IsExpRuntimeLookup()); - assert(ctxTree->OperIs(GT_LCL_VAR)); - Statement* asgFallbackStmt = fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fallbackCall)); - asgFallbackStmt->SetDebugInfo(debugInfo); - fgInsertStmtAtBeg(fallbackBb, asgFallbackStmt); - gtSetStmtInfo(asgFallbackStmt); - fgSetStmtSeq(asgFallbackStmt); - gtUpdateTreeAncestorsSideEffects(fallbackCall); + GenTreeCall* fallbackCall = gtCloneExpr(call)->AsCall(); + GenTree* asgFallbackValue = gtNewAssignNode(gtClone(rtLookupLcl), fallbackCall); + BasicBlock* fallbackBb = CreateBlockFromTree(this, nullcheckBb, BBJ_NONE, asgFallbackValue, debugInfo); // Fast-path basic block - BasicBlock* fastPathBb = fgNewBBafter(BBJ_ALWAYS, nullcheckBb, true); - fastPathBb->bbFlags |= BBF_INTERNAL; - Statement* asgFastPathValueStmt = - fgNewStmtFromTree(gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone)); - asgFastPathValueStmt->SetDebugInfo(debugInfo); - fgInsertStmtAtBeg(fastPathBb, asgFastPathValueStmt); - gtSetStmtInfo(asgFastPathValueStmt); - fgSetStmtSeq(asgFastPathValueStmt); + GenTree* asgFastpathValue = gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone); + BasicBlock* fastPathBb = + CreateBlockFromTree(this, nullcheckBb, BBJ_ALWAYS, asgFastpathValue, debugInfo); BasicBlock* sizeCheckBb = nullptr; if (needsSizeCheck) @@ -291,9 +297,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // use(rtLookupLcl); // - sizeCheckBb = fgNewBBbefore(BBJ_COND, nullcheckBb, true); - sizeCheckBb->bbFlags |= BBF_INTERNAL; - // sizeValue = dictionary[pRuntimeLookup->sizeOffset] GenTreeIntCon* sizeOffset = gtNewIconNode(runtimeLookup.sizeOffset, TYP_I_IMPL); assert(lastIndOfTree != nullptr); @@ -306,19 +309,22 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() gtNewIconNode(runtimeLookup.offsets[runtimeLookup.indirections - 1], TYP_I_IMPL); GenTree* sizeCheck = gtNewOperNode(GT_LE, TYP_INT, sizeValue, offsetValue); sizeCheck->gtFlags |= GTF_RELOP_JMP_USED; - gtSetEvalOrder(sizeCheck); - Statement* sizeCheckStmt = fgNewStmtFromTree(gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck)); - sizeCheckStmt->SetDebugInfo(debugInfo); - gtSetStmtInfo(sizeCheckStmt); - fgSetStmtSeq(sizeCheckStmt); - fgInsertStmtAtEnd(sizeCheckBb, sizeCheckStmt); + + GenTree* jtrue = gtNewOperNode(GT_JTRUE, TYP_VOID, sizeCheck); + sizeCheckBb = CreateBlockFromTree(this, prevBb, BBJ_COND, jtrue, debugInfo); } - // Replace call with rtLookupLclNum local + // Replace call with rtLookupLclNum local and update side effects call->BashToLclVar(this, rtLookupLclNum); + gtSetEvalOrder(call); gtUpdateTreeAncestorsSideEffects(call); + gtUpdateStmtSideEffects(stmt); + gtSetStmtInfo(stmt); + fgSetStmtSeq(stmt); - // Connect all new blocks together + // + // Update preds in all new blocks + // fgRemoveRefPred(block, prevBb); fgAddRefPred(block, fastPathBb); fgAddRefPred(block, fallbackBb); @@ -327,19 +333,15 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if (needsSizeCheck) { - // Size check is the first block after prevBb + // sizeCheckBb is the first block after prevBb fgAddRefPred(sizeCheckBb, prevBb); - // sizeCheckBb flows into nullcheckBb in case if the size check passes fgAddRefPred(nullcheckBb, sizeCheckBb); - // fallbackBb is reachable from both nullcheckBb and sizeCheckBb fgAddRefPred(fallbackBb, nullcheckBb); fgAddRefPred(fallbackBb, sizeCheckBb); - // fastPathBb is only reachable from successful nullcheckBb fgAddRefPred(fastPathBb, nullcheckBb); - // sizeCheckBb fails - jump to fallbackBb sizeCheckBb->bbJumpDest = fallbackBb; } @@ -347,61 +349,40 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() { // nullcheckBb is the first block after prevBb fgAddRefPred(nullcheckBb, prevBb); - // No size check, nullcheckBb jumps to fast path fgAddRefPred(fastPathBb, nullcheckBb); - // fallbackBb is only reachable from nullcheckBb (jump destination) fgAddRefPred(fallbackBb, nullcheckBb); } - // Some quick validation - assert(prevBb->NumSucc() == 1); - if (needsSizeCheck) - { - assert(prevBb->GetSucc(0) == sizeCheckBb); - assert(sizeCheckBb->NumSucc() == 2); - } - else - { - assert(prevBb->GetSucc(0) == nullcheckBb); - } - assert(nullcheckBb->NumSucc() == 2); - assert(fastPathBb->NumSucc() == 1); - assert(fallbackBb->NumSucc() == 1); - assert(fastPathBb->GetSucc(0) == block); - assert(fallbackBb->GetSucc(0) == block); - + // // Re-distribute weights (see '[weight: X]' on the diagrams above) - + // TODO: consider marking fallbackBb as rarely-taken + // block->inheritWeight(prevBb); - if (needsSizeCheck) { sizeCheckBb->inheritWeight(prevBb); - // 80% chance we pass nullcheck nullcheckBb->inheritWeightPercentage(sizeCheckBb, 80); - // 64% (0.8 * 0.8) chance we pass both nullcheck and sizecheck fastPathBb->inheritWeightPercentage(nullcheckBb, 80); - // 100-64=36% chance we fail either nullcheck or sizecheck fallbackBb->inheritWeightPercentage(sizeCheckBb, 36); } else { nullcheckBb->inheritWeight(prevBb); - // 80% chance we pass nullcheck fastPathBb->inheritWeightPercentage(nullcheckBb, 80); - // 20% chance we fail nullcheck (TODO: Consider making it cold (0%)) fallbackBb->inheritWeightPercentage(nullcheckBb, 20); } - // Update loop info - if (prevBb->bbNatLoopNum != BasicBlock::NOT_IN_LOOP) + // + // Update loop info if loop table is known to be valid + // + if (optLoopTableValid && prevBb->bbNatLoopNum != BasicBlock::NOT_IN_LOOP) { nullcheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; fastPathBb->bbNatLoopNum = prevBb->bbNatLoopNum; @@ -410,16 +391,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() { sizeCheckBb->bbNatLoopNum = prevBb->bbNatLoopNum; } - // Update lpBottom after block split if (optLoopTable[prevBb->bbNatLoopNum].lpBottom == prevBb) { optLoopTable[prevBb->bbNatLoopNum].lpBottom = block; } } - gtUpdateStmtSideEffects(stmt); - gtSetStmtInfo(stmt); - fgSetStmtSeq(stmt); // All blocks are expected to be in the same EH region assert(BasicBlock::sameEHRegion(prevBb, block)); @@ -435,7 +412,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // for that result = PhaseStatus::MODIFIED_EVERYTHING; block = prevBb; - goto TRAVERSE_BLOCK_AGAIN; + goto VISIT_BLOCK_AGAIN; } } } @@ -456,6 +433,5 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } #endif } - return result; } From 3e3b15f9a4eecd825eeadae9619c08b58d8f494a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 5 Mar 2023 14:11:57 +0100 Subject: [PATCH 39/63] Mark ArrayStack ctor explicit --- src/coreclr/jit/arraystack.h | 2 +- src/coreclr/jit/gentree.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/arraystack.h b/src/coreclr/jit/arraystack.h index eb8a17932ca63d..83a43c9432ba0e 100644 --- a/src/coreclr/jit/arraystack.h +++ b/src/coreclr/jit/arraystack.h @@ -10,7 +10,7 @@ class ArrayStack static const int builtinSize = 8; public: - ArrayStack(CompAllocator alloc, int initialCapacity = builtinSize) : m_alloc(alloc) + explicit ArrayStack(CompAllocator alloc, int initialCapacity = builtinSize) : m_alloc(alloc) { if (initialCapacity > builtinSize) { diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 1d307e93105d9f..2926eda960bb53 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16345,7 +16345,7 @@ void Compiler::gtSplitTree( else if ((*use)->OperIs(GT_FIELD_LIST)) { GenTreeFieldList* fieldList = (*use)->AsFieldList(); - ArrayStack fieldsStack = m_compiler->getAllocator(CMK_ArrayStack); + ArrayStack fieldsStack(m_compiler->getAllocator(CMK_ArrayStack)); for (GenTreeFieldList::Use& use : fieldList->Uses()) { fieldsStack.Push(&use.NodeRef()); From 0f4025916b702b1fa3a750b7f36704539d38b16d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 5 Mar 2023 14:14:39 +0100 Subject: [PATCH 40/63] Add docs to gtSplitTree --- src/coreclr/jit/gentree.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 2926eda960bb53..53a30984fbbf26 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16212,6 +16212,24 @@ bool Compiler::gtTreeHasSideEffects(GenTree* tree, GenTreeFlags flags /* = GTF_S return true; } +//------------------------------------------------------------------------ +// gtSplitTree: Split a statement into multiple statements such that a +// specified tree is the first executed non-invariant node in the statement. +// +// Arguments: +// block - The block containing the statement. +// stmt - The statement containing the tree. +// splitPoint - A tree inside the statement. +// firstNewStmt - [out] The first new statement that was introduced. +// [firstNewStmt..stmt) are the statements added by this function. +// splitNodeUse - The use of the tree to split at. +// +// Notes: +// This method turns all non-invariant nodes that would be executed before +// the split point into new separate statements. If those nodes were values +// this involves introducing new locals for those values, such that they can +// be used in the original statement. +// void Compiler::gtSplitTree( BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse) { From 2a29ac3663180060b2768f15a6d2fc4ab2605454 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 5 Mar 2023 14:15:02 +0100 Subject: [PATCH 41/63] Fix up ASG(COMMA(...), ...) handling --- src/coreclr/jit/gentree.cpp | 38 ++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 53a30984fbbf26..9933ef0d42f841 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16268,6 +16268,7 @@ void Compiler::gtSplitTree( fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) { + assert(!(*use)->OperIs(GT_QMARK)); m_useStack.Push(UseInfo{use, user}); return WALK_CONTINUE; } @@ -16307,7 +16308,9 @@ void Compiler::gtSplitTree( GenTree** use = useInf.Use; GenTree* user = useInf.User; - if ((*use)->IsInvariant()) + GenTree* node = *use; + + if (node->IsInvariant()) { return; } @@ -16317,14 +16320,35 @@ void Compiler::gtSplitTree( Statement* stmt = nullptr; if ((user != nullptr) && user->OperIs(GT_ASG) && (use == &user->AsOp()->gtOp1)) { - if ((*use)->OperIs(GT_COMMA)) + if (node->OperIs(GT_COMMA)) { - GenTree* sideEffects = nullptr; - m_compiler->gtExtractSideEffList(*use, &sideEffects); - if (sideEffects != nullptr) + // We have: + // ASG + // COMMA + // op1 + // op2 + // rhs + // And we want to split out the comma. + // + // For the first use we will update the ASG to be ASG(op2, rhs) + // so that we get the proper location treatment. The edge will + // then be the ASG --- op2 edge. + *use = node->gtGetOp2(); + UseInfo useA { use, user }; + + // The second use will be the COMMA --- op1 edge, which we + // expect to be handled by simple side effect extraction in + // the recursive call. + UseInfo useB { &node->AsOp()->gtOp1, node }; + + if (node->IsReverseOp()) { - stmt = m_compiler->fgNewStmtFromTree(sideEffects, m_splitStmt->GetDebugInfo()); + std::swap(useA, useB); } + + SplitOutUse(useA); + SplitOutUse(useB); + return; } // ASGs are special -- the evaluation of the immediate first @@ -16336,7 +16360,7 @@ void Compiler::gtSplitTree( // node we are splitting out. // - assert((*use)->OperIs(GT_IND, GT_OBJ, GT_BLK, GT_LCL_VAR, GT_LCL_FLD, GT_COMMA)); + assert((*use)->OperIs(GT_IND, GT_OBJ, GT_BLK, GT_LCL_VAR, GT_LCL_FLD)); if ((*use)->OperIsUnary()) { user = *use; From 598ff714ba444a9409e3a7deea3b6cc84d0a3c12 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 5 Mar 2023 15:12:02 +0100 Subject: [PATCH 42/63] Split out uses in execution order instead --- src/coreclr/jit/gentree.cpp | 63 ++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 9933ef0d42f841..c58494519e5d1c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16277,16 +16277,31 @@ void Compiler::gtSplitTree( { if (*use == m_splitNode) { - GenTree* ancestor = *use; - while (!m_useStack.Empty()) + GenTree* ancestor = nullptr; + + // Split all siblings and ancestor siblings. + int i; + for (i = 0; i < m_useStack.Height() - 1; i++) { - while (m_useStack.Top(0).User == ancestor) + const UseInfo& useInf = m_useStack.BottomRef(i); + if (useInf.Use == use) + { + break; + } + + if (m_useStack.BottomRef(i + 1).User == useInf.User) { - SplitOutUse(m_useStack.Pop()); + SplitOutUse(useInf); } + } + + assert(m_useStack.Bottom(i).Use == use); - assert(*m_useStack.Top(0).Use == ancestor); - ancestor = m_useStack.Pop().User; + // Split operands. + for (i++; i < m_useStack.Height(); i++) + { + assert(m_useStack.BottomRef(i).User == *use); + SplitOutUse(m_useStack.BottomRef(i)); } SplitNodeUse = use; @@ -16330,24 +16345,27 @@ void Compiler::gtSplitTree( // rhs // And we want to split out the comma. // - // For the first use we will update the ASG to be ASG(op2, rhs) + // The first use will be the COMMA --- op1 edge, which we + // expect to be handled by simple side effect extraction in + // the recursive call. + UseInfo use1 { &node->AsOp()->gtOp1, node }; + + // For the second use we will update the ASG to be ASG(op2, rhs) // so that we get the proper location treatment. The edge will // then be the ASG --- op2 edge. *use = node->gtGetOp2(); - UseInfo useA { use, user }; - - // The second use will be the COMMA --- op1 edge, which we - // expect to be handled by simple side effect extraction in - // the recursive call. - UseInfo useB { &node->AsOp()->gtOp1, node }; + UseInfo use2 { use, user }; if (node->IsReverseOp()) { - std::swap(useA, useB); + SplitOutUse(use2); + SplitOutUse(use1); + } + else + { + SplitOutUse(use1); + SplitOutUse(use2); } - - SplitOutUse(useA); - SplitOutUse(useB); return; } @@ -16390,11 +16408,7 @@ void Compiler::gtSplitTree( ArrayStack fieldsStack(m_compiler->getAllocator(CMK_ArrayStack)); for (GenTreeFieldList::Use& use : fieldList->Uses()) { - fieldsStack.Push(&use.NodeRef()); - } - while (!fieldsStack.Empty()) - { - SplitOutUse(UseInfo{fieldsStack.Pop(), fieldList}); + SplitOutUse(UseInfo{&use.NodeRef(), fieldList}); } return; } @@ -16410,13 +16424,12 @@ void Compiler::gtSplitTree( { if (FirstStatement == nullptr) { - FirstStatement = m_splitStmt; + FirstStatement = stmt; } m_compiler->gtUpdateStmtSideEffects(stmt); m_compiler->gtSetStmtInfo(stmt); m_compiler->fgSetStmtSeq(stmt); - m_compiler->fgInsertStmtBefore(m_bb, FirstStatement, stmt); - FirstStatement = stmt; + m_compiler->fgInsertStmtBefore(m_bb, m_splitStmt, stmt); } } }; From 733fcd90db938224d23c4da8fcd3b3953d750437 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 5 Mar 2023 15:19:09 +0100 Subject: [PATCH 43/63] Remove dead local --- src/coreclr/jit/gentree.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index c58494519e5d1c..e39c186cf17d54 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16277,8 +16277,6 @@ void Compiler::gtSplitTree( { if (*use == m_splitNode) { - GenTree* ancestor = nullptr; - // Split all siblings and ancestor siblings. int i; for (i = 0; i < m_useStack.Height() - 1; i++) From a62a135f2f9f62cf61be2f2a97d93be357117951 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 5 Mar 2023 15:24:08 +0100 Subject: [PATCH 44/63] Clean up --- src/coreclr/jit/gentree.cpp | 35 ++++++++++++------------- src/coreclr/jit/importer.cpp | 43 +++++++++---------------------- src/coreclr/jit/runtimelookup.cpp | 35 ++++++++++++++----------- 3 files changed, 49 insertions(+), 64 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index e39c186cf17d54..796729df5537a4 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16328,6 +16328,20 @@ void Compiler::gtSplitTree( return; } + // Don't spill some locals we know they never change + if (node->OperIs(GT_LCL_VAR)) + { + unsigned lclNum = node->AsLclVar()->GetLclNum(); + LclVarDsc* lclDsc = m_compiler->lvaGetDesc(lclNum); + + // Generic context + if (lclDsc->lvIsParam && !lclDsc->IsAddressExposed() && + (m_compiler->compMap2ILvarNum(lclNum) == (unsigned)ICorDebugInfo::TYPECTXT_ILNUM)) + { + return; + } + } + assert((user == nullptr) || !user->OperIs(GT_ADDR)); Statement* stmt = nullptr; @@ -16346,13 +16360,13 @@ void Compiler::gtSplitTree( // The first use will be the COMMA --- op1 edge, which we // expect to be handled by simple side effect extraction in // the recursive call. - UseInfo use1 { &node->AsOp()->gtOp1, node }; + UseInfo use1{&node->AsOp()->gtOp1, node}; // For the second use we will update the ASG to be ASG(op2, rhs) // so that we get the proper location treatment. The edge will // then be the ASG --- op2 edge. *use = node->gtGetOp2(); - UseInfo use2 { use, user }; + UseInfo use2{use, user}; if (node->IsReverseOp()) { @@ -16402,7 +16416,7 @@ void Compiler::gtSplitTree( } else if ((*use)->OperIs(GT_FIELD_LIST)) { - GenTreeFieldList* fieldList = (*use)->AsFieldList(); + GenTreeFieldList* fieldList = (*use)->AsFieldList(); ArrayStack fieldsStack(m_compiler->getAllocator(CMK_ArrayStack)); for (GenTreeFieldList::Use& use : fieldList->Uses()) { @@ -16432,21 +16446,6 @@ void Compiler::gtSplitTree( } }; - GenTree* rootNode = stmt->GetRootNode(); - if (rootNode == splitPoint) - { - *firstNewStmt = nullptr; - *splitNodeUse = stmt->GetRootNodePointer(); - return; - } - - if (rootNode->OperIs(GT_ASG) && rootNode->gtGetOp1()->OperIs(GT_LCL_VAR) && rootNode->gtGetOp2() == splitPoint) - { - *firstNewStmt = nullptr; - *splitNodeUse = &rootNode->AsOp()->gtOp2; - return; - } - Splitter splitter(this, block, stmt, splitPoint); splitter.WalkTree(stmt->GetRootNodePointer(), nullptr); *firstNewStmt = splitter.FirstStatement; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 4e7591edfc417a..876a00064da26e 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1886,39 +1886,20 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); - if (opts.OptimizationEnabled() || (pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK)) - { - // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after - // "Expand runtime lookups" phase. - argNode->gtFlags |= GTF_DONT_CSE; + // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after + // "Expand runtime lookups" phase. + // argNode->gtFlags |= GTF_DONT_CSE; - // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. - // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 - impInlineRoot()->setMethodHasExpRuntimeLookup(); - helperCall->SetExpRuntimeLookup(); - if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) - { - JITDUMP("Registering %p in SignatureToLookupInfoMap\n", pRuntimeLookup->signature) - impInlineRoot()->GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); - } - return helperCall; + // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. + // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 + impInlineRoot()->setMethodHasExpRuntimeLookup(); + helperCall->SetExpRuntimeLookup(); + if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) + { + JITDUMP("Registering %p in SignatureToLookupInfoMap\n", pRuntimeLookup->signature) + impInlineRoot()->GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); } - - // Extract the handle - GenTree* handleForNullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); - handleForNullCheck->gtFlags |= GTF_IND_NONFAULTING; - - // Check for null and possibly call helper - GenTree* nullCheck = gtNewOperNode(GT_NE, TYP_INT, handleForNullCheck, gtNewIconNode(0, TYP_I_IMPL)); - GenTree* handleForResult = gtCloneExpr(handleForNullCheck); - - GenTreeColon* colonNullCheck = new (this, GT_COLON) GenTreeColon(TYP_I_IMPL, handleForResult, helperCall); - GenTree* result = gtNewQmarkNode(TYP_I_IMPL, nullCheck, colonNullCheck); - - unsigned tmp = lvaGrabTemp(true DEBUGARG("spilling Runtime Lookup tree")); - - impAssignTempGen(tmp, result, CHECK_SPILL_NONE); - return gtNewLclvNode(tmp, TYP_I_IMPL); + return helperCall; } struct RecursiveGuard diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 779fd519a65c15..9c119071a27b13 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -24,10 +24,6 @@ static void* GetConstantPointer(Compiler* comp, GenTree* tree) // Save expression to a local and append as the last statement in prevBb static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprBlock, DebugInfo& debugInfo) { - if (expr->IsInvariant()) - { - return comp->gtCloneExpr(expr); - } unsigned const tmpNum = comp->lvaGrabTemp(true DEBUGARG("spilling expr")); comp->lvaTable[tmpNum].lvType = expr->TypeGet(); Statement* asgStmt = comp->fgNewStmtAtEnd(exprBlock, comp->gtNewTempAssign(tmpNum, expr)); @@ -49,7 +45,7 @@ static BasicBlock* CreateBlockFromTree( stmt->SetDebugInfo(debugInfo); comp->gtSetStmtInfo(stmt); comp->fgSetStmtSeq(stmt); - comp->gtUpdateTreeAncestorsSideEffects(tree); + comp->gtUpdateStmtSideEffects(stmt); newBlock->bbCodeOffs = insertAfter->bbCodeOffsEnd; newBlock->bbCodeOffsEnd = insertAfter->bbCodeOffsEnd; return newBlock; @@ -97,7 +93,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if (!doesMethodHaveExpRuntimeLookup()) { - #ifdef DEBUG // To make sure doesMethodHaveExpRuntimeLookup() is not lying to us: for (BasicBlock* block : Blocks()) @@ -115,6 +110,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() return result; } + INDEBUG(bool irIsPrinted = false); + // Find all calls with GTF_CALL_M_EXP_RUNTIME_LOOKUP flag for (BasicBlock* block : Blocks()) { @@ -140,8 +137,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if (call->IsTailCall()) { - // It is very unlikely to happen and is impossible to represent in C#, but just in case - // let's don't expand it: + // It is very unlikely to happen and is impossible to represent in C# continue; } @@ -159,6 +155,17 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() continue; } +#ifdef DEBUG + // Print full IR before any changes we're goint to make + if (!irIsPrinted && verbose) + { + irIsPrinted = true; + printf("\n*************** Before fgExpandRuntimeLookups()\n"); + fgDispBasicBlocks(true); + printf("\n"); + } +#endif + // Restore runtimeLookup using signature argument via a global dictionary CORINFO_RUNTIME_LOOKUP runtimeLookup = {}; const bool lookupFound = GetSignatureToLookupInfoMap()->Lookup(signature, &runtimeLookup); @@ -185,12 +192,11 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; GenTreeLclVar* rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); - // Save signature and generic context trees to locals if needed: - GenTree* ctxTree = SpillExpression(this, call->gtArgs.GetArgByIndex(0)->GetNode(), prevBb, debugInfo); - GenTree* sigNode = SpillExpression(this, call->gtArgs.GetArgByIndex(1)->GetNode(), prevBb, debugInfo); + GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); + GenTree* sigNode = call->gtArgs.GetArgByIndex(1)->GetNode(); // Prepare slotPtr tree (TODO: consider sharing this part with impRuntimeLookup) - GenTree* slotPtrTree = gtClone(ctxTree); + GenTree* slotPtrTree = gtCloneExpr(ctxTree); GenTree* indOffTree = nullptr; GenTree* lastIndOfTree = nullptr; for (WORD i = 0; i < runtimeLookup.indirections; i++) @@ -198,7 +204,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if ((i == 1 && runtimeLookup.indirectFirstOffset) || (i == 2 && runtimeLookup.indirectSecondOffset)) { indOffTree = SpillExpression(this, slotPtrTree, prevBb, debugInfo); - slotPtrTree = gtClone(indOffTree); + slotPtrTree = gtCloneExpr(indOffTree); } // The last indirection could be subject to a size check (dynamic dictionary expansion) @@ -222,7 +228,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() if (isLastIndirectionWithSizeCheck) { lastIndOfTree = SpillExpression(this, slotPtrTree, prevBb, debugInfo); - slotPtrTree = gtClone(lastIndOfTree); + slotPtrTree = gtCloneExpr(lastIndOfTree); } slotPtrTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, slotPtrTree, gtNewIconNode(runtimeLookup.offsets[i], TYP_I_IMPL)); @@ -317,7 +323,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Replace call with rtLookupLclNum local and update side effects call->BashToLclVar(this, rtLookupLclNum); gtSetEvalOrder(call); - gtUpdateTreeAncestorsSideEffects(call); gtUpdateStmtSideEffects(stmt); gtSetStmtInfo(stmt); fgSetStmtSeq(stmt); From 730ed350f1a19ba6c75eaa2973769b43d9b52f86 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 5 Mar 2023 15:33:55 +0100 Subject: [PATCH 45/63] Avoid creating statements for non address-exposed locals --- src/coreclr/jit/gentree.cpp | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 796729df5537a4..8ff11aacae2327 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -16287,6 +16287,9 @@ void Compiler::gtSplitTree( break; } + // If this has the same user as the next node then it is a + // sibling of an ancestor -- and thus not on the "path" + // that contains the split node. if (m_useStack.BottomRef(i + 1).User == useInf.User) { SplitOutUse(useInf); @@ -16295,7 +16298,7 @@ void Compiler::gtSplitTree( assert(m_useStack.Bottom(i).Use == use); - // Split operands. + // The remaining nodes should be operands of the split node. for (i++; i < m_useStack.Height(); i++) { assert(m_useStack.BottomRef(i).User == *use); @@ -16328,18 +16331,25 @@ void Compiler::gtSplitTree( return; } - // Don't spill some locals we know they never change - if (node->OperIs(GT_LCL_VAR)) + if (node->OperIs(GT_LCL_VAR) && !m_compiler->lvaGetDesc(node->AsLclVarCommon())->IsAddressExposed()) { - unsigned lclNum = node->AsLclVar()->GetLclNum(); - LclVarDsc* lclDsc = m_compiler->lvaGetDesc(lclNum); - - // Generic context - if (lclDsc->lvIsParam && !lclDsc->IsAddressExposed() && - (m_compiler->compMap2ILvarNum(lclNum) == (unsigned)ICorDebugInfo::TYPECTXT_ILNUM)) - { - return; - } + // The splitting we do here should always guarantee that we + // only introduce locals for the tree edges that overlap the + // split point, so it should be ok to avoid creating statements + // for locals that aren't address exposed. Note that this + // relies on it being illegal IR to have a tree edge for a + // register candidate that overlaps with an interfering node. + // + // For example, this optimization would be problematic if it + // could occur: + // + // CALL + // LCL_VAR V00 + // CALL + // ASG(V00, ...) (setup) + // LCL_VAR V00 + // + return; } assert((user == nullptr) || !user->OperIs(GT_ADDR)); From 9c6c89fb25dc69394cd8d645788fd46bae7b8a1d Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 5 Mar 2023 15:43:48 +0100 Subject: [PATCH 46/63] More cleanup (use callUse) --- src/coreclr/jit/runtimelookup.cpp | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 9c119071a27b13..66b37e62757b45 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -8,17 +8,16 @@ static void* GetConstantPointer(Compiler* comp, GenTree* tree) { - void* signature = nullptr; + void* cns = nullptr; if (tree->gtEffectiveVal()->IsCnsIntOrI()) { - signature = (void*)tree->gtEffectiveVal()->AsIntCon()->IconValue(); + cns = (void*)tree->gtEffectiveVal()->AsIntCon()->IconValue(); } else if (comp->vnStore->IsVNConstant(tree->gtVNPair.GetLiberal())) { - // signature is not a constant (CSE'd?) - let's see if we can access it via VN - signature = (void*)comp->vnStore->CoercedConstantValue(tree->gtVNPair.GetLiberal()); + cns = (void*)comp->vnStore->CoercedConstantValue(tree->gtVNPair.GetLiberal()); } - return signature; + return cns; } // Save expression to a local and append as the last statement in prevBb @@ -52,11 +51,10 @@ static BasicBlock* CreateBlockFromTree( } static BasicBlock* SplitBlockBeforeTree( - Compiler* comp, BasicBlock* block, Statement* stmt, GenTree* splitPoint, BasicBlock** prevBlock) + Compiler* comp, BasicBlock* block, Statement* stmt, GenTree* splitPoint, BasicBlock** prevBlock, GenTree*** callUse) { Statement* firstNewStmt; - GenTree** callUse; - comp->gtSplitTree(block, stmt, splitPoint, &firstNewStmt, &callUse); + comp->gtSplitTree(block, stmt, splitPoint, &firstNewStmt, callUse); BasicBlockFlags originalFlags = block->bbFlags; BasicBlock* prevBb = block; @@ -183,8 +181,9 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() assert(runtimeLookup.testForNull); // Split block right before the call tree - BasicBlock* prevBb = nullptr; - block = SplitBlockBeforeTree(this, block, stmt, call, &prevBb); + BasicBlock* prevBb = nullptr; + GenTree** callUse = nullptr; + block = SplitBlockBeforeTree(this, block, stmt, call, &prevBb, &callUse); assert(prevBb != nullptr && block != nullptr); // Define a local for the result @@ -267,9 +266,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() debugInfo); // Fallback basic block - GenTreeCall* fallbackCall = gtCloneExpr(call)->AsCall(); - GenTree* asgFallbackValue = gtNewAssignNode(gtClone(rtLookupLcl), fallbackCall); - BasicBlock* fallbackBb = CreateBlockFromTree(this, nullcheckBb, BBJ_NONE, asgFallbackValue, debugInfo); + GenTree* asgFallbackValue = gtNewAssignNode(gtClone(rtLookupLcl), call); + BasicBlock* fallbackBb = CreateBlockFromTree(this, nullcheckBb, BBJ_NONE, asgFallbackValue, debugInfo); // Fast-path basic block GenTree* asgFastpathValue = gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone); @@ -321,7 +319,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } // Replace call with rtLookupLclNum local and update side effects - call->BashToLclVar(this, rtLookupLclNum); + *callUse = gtClone(rtLookupLcl); gtSetEvalOrder(call); gtUpdateStmtSideEffects(stmt); gtSetStmtInfo(stmt); From f2c1508677bda1833b211897ab433e0eca8db5b9 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 5 Mar 2023 16:36:35 +0100 Subject: [PATCH 47/63] use old path for tier0 (slightly better CQ/TP for tier0) --- src/coreclr/jit/importer.cpp | 43 ++++++++++++++++++++++--------- src/coreclr/jit/runtimelookup.cpp | 9 ++++--- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 876a00064da26e..4e7591edfc417a 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1886,20 +1886,39 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); - // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after - // "Expand runtime lookups" phase. - // argNode->gtFlags |= GTF_DONT_CSE; - - // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. - // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 - impInlineRoot()->setMethodHasExpRuntimeLookup(); - helperCall->SetExpRuntimeLookup(); - if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) + if (opts.OptimizationEnabled() || (pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK)) { - JITDUMP("Registering %p in SignatureToLookupInfoMap\n", pRuntimeLookup->signature) - impInlineRoot()->GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); + // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after + // "Expand runtime lookups" phase. + argNode->gtFlags |= GTF_DONT_CSE; + + // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. + // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 + impInlineRoot()->setMethodHasExpRuntimeLookup(); + helperCall->SetExpRuntimeLookup(); + if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) + { + JITDUMP("Registering %p in SignatureToLookupInfoMap\n", pRuntimeLookup->signature) + impInlineRoot()->GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); + } + return helperCall; } - return helperCall; + + // Extract the handle + GenTree* handleForNullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); + handleForNullCheck->gtFlags |= GTF_IND_NONFAULTING; + + // Check for null and possibly call helper + GenTree* nullCheck = gtNewOperNode(GT_NE, TYP_INT, handleForNullCheck, gtNewIconNode(0, TYP_I_IMPL)); + GenTree* handleForResult = gtCloneExpr(handleForNullCheck); + + GenTreeColon* colonNullCheck = new (this, GT_COLON) GenTreeColon(TYP_I_IMPL, handleForResult, helperCall); + GenTree* result = gtNewQmarkNode(TYP_I_IMPL, nullCheck, colonNullCheck); + + unsigned tmp = lvaGrabTemp(true DEBUGARG("spilling Runtime Lookup tree")); + + impAssignTempGen(tmp, result, CHECK_SPILL_NONE); + return gtNewLclvNode(tmp, TYP_I_IMPL); } struct RecursiveGuard diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 66b37e62757b45..dad45623d9f716 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -113,9 +113,14 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Find all calls with GTF_CALL_M_EXP_RUNTIME_LOOKUP flag for (BasicBlock* block : Blocks()) { - VISIT_BLOCK_AGAIN: for (Statement* const stmt : block->Statements()) { + if ((stmt->GetRootNode()->gtFlags & GTF_CALL) == 0) + { + // TP: Stmt has no calls - bail out + continue; + } + for (GenTree* const tree : stmt->TreeList()) { // We only need calls with IsExpRuntimeLookup() flag @@ -414,8 +419,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // We don't try to re-use expansions for the same lookups in the current block here - CSE is responsible // for that result = PhaseStatus::MODIFIED_EVERYTHING; - block = prevBb; - goto VISIT_BLOCK_AGAIN; } } } From c1cc98044713ad12b54389f535f2f667414c0abf Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 5 Mar 2023 17:01:30 +0100 Subject: [PATCH 48/63] Code clean up --- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/fgbasic.cpp | 42 ++++++++++++++++++++++++++++++ src/coreclr/jit/importer.cpp | 2 ++ src/coreclr/jit/runtimelookup.cpp | 43 ++++++++----------------------- 4 files changed, 56 insertions(+), 32 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index c5fa280df28dfc..57c5a819ba66e5 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4767,6 +4767,7 @@ class Compiler BasicBlock* fgSplitBlockAfterStatement(BasicBlock* curr, Statement* stmt); BasicBlock* fgSplitBlockAfterNode(BasicBlock* curr, GenTree* node); // for LIR BasicBlock* fgSplitEdge(BasicBlock* curr, BasicBlock* succ); + BasicBlock* fgSplitBlockBeforeTree(BasicBlock* block, Statement* stmt, GenTree* tree, GenTree*** treeUse); Statement* fgNewStmtFromTree(GenTree* tree, BasicBlock* block, const DebugInfo& di); Statement* fgNewStmtFromTree(GenTree* tree); diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 581a5aa620bef8..56866aa2626f11 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -4616,6 +4616,48 @@ BasicBlock* Compiler::fgSplitBlockAfterStatement(BasicBlock* curr, Statement* st return newBlock; } +//------------------------------------------------------------------------------ +// fgSplitBlockBeforeTree : Split the given block right before the given tree +// +// Arguments: +// block - block that contains the tree (split point) +// stmt - statement that contains the tree +// tree - the tree we're going to use as a split point for given block +// treeUse - pointer to the tree, useful when it's needed to replace it +// +// Returns: +// The last block after split +// +// Notes: +// See comments in gtSplitTree +// +BasicBlock* Compiler::fgSplitBlockBeforeTree(BasicBlock* block, Statement* stmt, GenTree* tree, GenTree*** treeUse) +{ + Statement* firstNewStmt; + gtSplitTree(block, stmt, tree, &firstNewStmt, treeUse); + + BasicBlockFlags originalFlags = block->bbFlags; + BasicBlock* prevBb = block; + + if (stmt == block->firstStmt()) + { + block = fgSplitBlockAtBeginning(prevBb); + } + else + { + assert(stmt->GetPrevStmt() != block->lastStmt()); + JITDUMP("Splitting " FMT_BB " after statement " FMT_STMT "\n", prevBb->bbNum, stmt->GetPrevStmt()->GetID()); + block = fgSplitBlockAfterStatement(prevBb, stmt->GetPrevStmt()); + } + + // We split a block, possibly, in the middle - we need to propagate some flags + prevBb->bbFlags = originalFlags & (~(BBF_SPLIT_LOST | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL) | BBF_GC_SAFE_POINT); + block->bbFlags |= + originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL); + + return block; +} + //------------------------------------------------------------------------------ // fgSplitBlockAfterNode - Split the given block, with all code after // the given node going into the second block. diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 4e7591edfc417a..44950b28c6b034 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1886,6 +1886,8 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); + // Partially inline it later in fgExpandRuntimeLookups. Although, keep the QMARK path below for Tier0 + // as it demonstrates better CQ and TP for Tier0. if (opts.OptimizationEnabled() || (pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK)) { // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index dad45623d9f716..76feafd17de7eb 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -6,6 +6,7 @@ #pragma hdrstop #endif +// Obtain constant pointer from a tree static void* GetConstantPointer(Compiler* comp, GenTree* tree) { void* cns = nullptr; @@ -20,7 +21,7 @@ static void* GetConstantPointer(Compiler* comp, GenTree* tree) return cns; } -// Save expression to a local and append as the last statement in prevBb +// Save expression to a local and append it as the last statement in exprBlock static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprBlock, DebugInfo& debugInfo) { unsigned const tmpNum = comp->lvaGrabTemp(true DEBUGARG("spilling expr")); @@ -32,6 +33,7 @@ static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprB return comp->gtNewLclvNode(tmpNum, expr->TypeGet()); }; +// Create block from the given tree static BasicBlock* CreateBlockFromTree( Compiler* comp, BasicBlock* insertAfter, BBjumpKinds blockKind, GenTree* tree, DebugInfo& debugInfo) { @@ -50,41 +52,18 @@ static BasicBlock* CreateBlockFromTree( return newBlock; } -static BasicBlock* SplitBlockBeforeTree( - Compiler* comp, BasicBlock* block, Statement* stmt, GenTree* splitPoint, BasicBlock** prevBlock, GenTree*** callUse) -{ - Statement* firstNewStmt; - comp->gtSplitTree(block, stmt, splitPoint, &firstNewStmt, callUse); - - BasicBlockFlags originalFlags = block->bbFlags; - BasicBlock* prevBb = block; - - if (stmt == block->firstStmt()) - { - block = comp->fgSplitBlockAtBeginning(prevBb); - } - else - { - assert(stmt->GetPrevStmt() != block->lastStmt()); - JITDUMP("Splitting " FMT_BB " after statement " FMT_STMT "\n", prevBb->bbNum, stmt->GetPrevStmt()->GetID()) - block = comp->fgSplitBlockAfterStatement(prevBb, stmt->GetPrevStmt()); - } - - // We split a block, possibly, in the middle - we need to propagate some flags - prevBb->bbFlags = originalFlags & (~(BBF_SPLIT_LOST | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL) | BBF_GC_SAFE_POINT); - block->bbFlags |= - originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL); - - *prevBlock = prevBb; - return block; -} - //------------------------------------------------------------------------------ // fgExpandRuntimeLookups : partially expand runtime lookups helper calls // to add a nullcheck [+ size check] and a fast path // Returns: // PhaseStatus indicating what, if anything, was changed. // +// Notes: +// The runtime lookup itself is needed to access a handle in code shared between +// generic instantiations. The lookup depends on the typeContext which is only available at +// runtime, and not at compile - time. See ASCII block diagrams in comments below for +// better understanding how this phase expands runtime lookups. +// PhaseStatus Compiler::fgExpandRuntimeLookups() { PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; @@ -186,9 +165,9 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() assert(runtimeLookup.testForNull); // Split block right before the call tree - BasicBlock* prevBb = nullptr; + BasicBlock* prevBb = block; GenTree** callUse = nullptr; - block = SplitBlockBeforeTree(this, block, stmt, call, &prevBb, &callUse); + block = fgSplitBlockBeforeTree(block, stmt, call, &callUse); assert(prevBb != nullptr && block != nullptr); // Define a local for the result From 2834318a742c25ef9d25c84ae35842312a95de12 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 11 Mar 2023 23:27:20 +0100 Subject: [PATCH 49/63] resolve conflicts --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fgbasic.cpp | 16 +++++++++------- src/coreclr/jit/runtimelookup.cpp | 14 ++++++++++---- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5d2124ead10390..7ab54332866fb3 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4814,7 +4814,7 @@ class Compiler BasicBlock* fgSplitBlockAfterStatement(BasicBlock* curr, Statement* stmt); BasicBlock* fgSplitBlockAfterNode(BasicBlock* curr, GenTree* node); // for LIR BasicBlock* fgSplitEdge(BasicBlock* curr, BasicBlock* succ); - BasicBlock* fgSplitBlockBeforeTree(BasicBlock* block, Statement* stmt, GenTree* tree, GenTree*** treeUse); + BasicBlock* fgSplitBlockBeforeTree(BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse); Statement* fgNewStmtFromTree(GenTree* tree, BasicBlock* block, const DebugInfo& di); Statement* fgNewStmtFromTree(GenTree* tree); diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 2e48569c053374..0824a63f576e0f 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -4622,10 +4622,12 @@ BasicBlock* Compiler::fgSplitBlockAfterStatement(BasicBlock* curr, Statement* st // fgSplitBlockBeforeTree : Split the given block right before the given tree // // Arguments: -// block - block that contains the tree (split point) -// stmt - statement that contains the tree -// tree - the tree we're going to use as a split point for given block -// treeUse - pointer to the tree, useful when it's needed to replace it +// block - The block containing the statement. +// stmt - The statement containing the tree. +// splitPoint - A tree inside the statement. +// firstNewStmt - [out] The first new statement that was introduced. +// [firstNewStmt..stmt) are the statements added by this function. +// splitNodeUse - The use of the tree to split at. // // Returns: // The last block after split @@ -4633,10 +4635,10 @@ BasicBlock* Compiler::fgSplitBlockAfterStatement(BasicBlock* curr, Statement* st // Notes: // See comments in gtSplitTree // -BasicBlock* Compiler::fgSplitBlockBeforeTree(BasicBlock* block, Statement* stmt, GenTree* tree, GenTree*** treeUse) +BasicBlock* Compiler::fgSplitBlockBeforeTree( + BasicBlock* block, Statement* stmt, GenTree* splitPoint, Statement** firstNewStmt, GenTree*** splitNodeUse) { - Statement* firstNewStmt; - gtSplitTree(block, stmt, tree, &firstNewStmt, treeUse); + gtSplitTree(block, stmt, splitPoint, firstNewStmt, splitNodeUse); BasicBlockFlags originalFlags = block->bbFlags; BasicBlock* prevBb = block; diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 76feafd17de7eb..de8a7473150600 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -165,9 +165,10 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() assert(runtimeLookup.testForNull); // Split block right before the call tree - BasicBlock* prevBb = block; - GenTree** callUse = nullptr; - block = fgSplitBlockBeforeTree(block, stmt, call, &callUse); + BasicBlock* prevBb = block; + GenTree** callUse = nullptr; + Statement* newFirstStmt = nullptr; + block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); assert(prevBb != nullptr && block != nullptr); // Define a local for the result @@ -304,7 +305,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Replace call with rtLookupLclNum local and update side effects *callUse = gtClone(rtLookupLcl); - gtSetEvalOrder(call); + while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) + { + fgMorphStmtBlockOps(block, newFirstStmt); + newFirstStmt = newFirstStmt->GetNextStmt(); + } + fgMorphStmtBlockOps(block, stmt); gtUpdateStmtSideEffects(stmt); gtSetStmtInfo(stmt); fgSetStmtSeq(stmt); From 79b5076e9fe991c0fd432af11b734c5398778b22 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 12 Mar 2023 01:17:32 +0100 Subject: [PATCH 50/63] Update runtimelookup.cpp --- src/coreclr/jit/runtimelookup.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index de8a7473150600..b71f1b223a0369 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -165,10 +165,10 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() assert(runtimeLookup.testForNull); // Split block right before the call tree - BasicBlock* prevBb = block; + BasicBlock* prevBb = block; GenTree** callUse = nullptr; Statement* newFirstStmt = nullptr; - block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); + block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); assert(prevBb != nullptr && block != nullptr); // Define a local for the result From f0f14a74ce32a2ad5a7734c756906289d7b75e71 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 12 Mar 2023 13:42:45 +0100 Subject: [PATCH 51/63] Update src/coreclr/jit/runtimelookup.cpp Co-authored-by: Jakob Botsch Nielsen --- src/coreclr/jit/runtimelookup.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index b71f1b223a0369..9201e8028bbe33 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -312,8 +312,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } fgMorphStmtBlockOps(block, stmt); gtUpdateStmtSideEffects(stmt); - gtSetStmtInfo(stmt); - fgSetStmtSeq(stmt); // // Update preds in all new blocks From 2a3d83bceef5727142b7c6f0e613c8d0f18c202b Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 13 Mar 2023 03:24:55 +0100 Subject: [PATCH 52/63] Mitigate some tier0 regressions --- src/coreclr/jit/importer.cpp | 51 ++++++++---------------- src/coreclr/jit/runtimelookup.cpp | 66 +++++++++++++++---------------- 2 files changed, 49 insertions(+), 68 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index ade5e47f1cbb3c..99811ef44c63f6 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1886,41 +1886,22 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); - // Partially inline it later in fgExpandRuntimeLookups. Although, keep the QMARK path below for Tier0 - // as it demonstrates better CQ and TP for Tier0. - if (opts.OptimizationEnabled() || (pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK)) - { - // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after - // "Expand runtime lookups" phase. - argNode->gtFlags |= GTF_DONT_CSE; - - // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. - // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 - impInlineRoot()->setMethodHasExpRuntimeLookup(); - helperCall->SetExpRuntimeLookup(); - if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) - { - JITDUMP("Registering %p in SignatureToLookupInfoMap\n", pRuntimeLookup->signature) - impInlineRoot()->GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); - } - return helperCall; - } - - // Extract the handle - GenTree* handleForNullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); - handleForNullCheck->gtFlags |= GTF_IND_NONFAULTING; - - // Check for null and possibly call helper - GenTree* nullCheck = gtNewOperNode(GT_NE, TYP_INT, handleForNullCheck, gtNewIconNode(0, TYP_I_IMPL)); - GenTree* handleForResult = gtCloneExpr(handleForNullCheck); - - GenTreeColon* colonNullCheck = new (this, GT_COLON) GenTreeColon(TYP_I_IMPL, handleForResult, helperCall); - GenTree* result = gtNewQmarkNode(TYP_I_IMPL, nullCheck, colonNullCheck); - - unsigned tmp = lvaGrabTemp(true DEBUGARG("spilling Runtime Lookup tree")); - - impAssignTempGen(tmp, result, CHECK_SPILL_NONE); - return gtNewLclvNode(tmp, TYP_I_IMPL); + // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after + // "Expand runtime lookups" phase. + argNode->gtFlags |= GTF_DONT_CSE; + + // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. + // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 + impInlineRoot()->setMethodHasExpRuntimeLookup(); + helperCall->SetExpRuntimeLookup(); + if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) + { + JITDUMP("Registering %p in SignatureToLookupInfoMap\n", pRuntimeLookup->signature) + impInlineRoot()->GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); + } + unsigned callLclNum = lvaGrabTemp(true DEBUGARG("spilling helperCall")); + impAssignTempGen(callLclNum, helperCall); + return gtNewLclvNode(callLclNum, helperCall->TypeGet()); } struct RecursiveGuard diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 9201e8028bbe33..8910b6851c6a22 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -68,25 +68,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() { PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; - if (!doesMethodHaveExpRuntimeLookup()) - { -#ifdef DEBUG - // To make sure doesMethodHaveExpRuntimeLookup() is not lying to us: - for (BasicBlock* block : Blocks()) - { - for (Statement* stmt : block->Statements()) - { - for (GenTree* tree : stmt->TreeList()) - { - assert(!tree->IsCall() || (tree->IsCall() && !tree->AsCall()->IsExpRuntimeLookup())); - } - } - } -#endif - JITDUMP("Current method doesn't have runtime lookups - bail out.") - return result; - } - INDEBUG(bool irIsPrinted = false); // Find all calls with GTF_CALL_M_EXP_RUNTIME_LOOKUP flag @@ -171,10 +152,39 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); assert(prevBb != nullptr && block != nullptr); - // Define a local for the result - const unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); - lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; - GenTreeLclVar* rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); + GenTreeLclVar* rtLookupLcl = nullptr; + + // Mostly for Tier0: if the current statement is ASG(LCL, RuntimeLookup) + // we can drop it and use that LCL as the destination + if (stmt->GetRootNode()->OperIs(GT_ASG)) + { + GenTree* lhs = stmt->GetRootNode()->gtGetOp1(); + GenTree* rhs = stmt->GetRootNode()->gtGetOp2(); + if (lhs->OperIs(GT_LCL_VAR) && rhs == *callUse) + { + rtLookupLcl = gtClone(lhs)->AsLclVar(); + fgRemoveStmt(block, stmt); + } + } + + // Grab a temp to store result (it's assigned from either fastPathBb or fallbackBb) + if (rtLookupLcl == nullptr) + { + // Define a local for the result + unsigned rtLookupLclNum = lvaGrabTemp(true DEBUGARG("runtime lookup")); + lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; + rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); + + // Replace call with rtLookupLclNum local and update side effects + *callUse = gtClone(rtLookupLcl); + while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) + { + fgMorphStmtBlockOps(block, newFirstStmt); + newFirstStmt = newFirstStmt->GetNextStmt(); + } + fgMorphStmtBlockOps(block, stmt); + gtUpdateStmtSideEffects(stmt); + } GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); GenTree* sigNode = call->gtArgs.GetArgByIndex(1)->GetNode(); @@ -303,16 +313,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() sizeCheckBb = CreateBlockFromTree(this, prevBb, BBJ_COND, jtrue, debugInfo); } - // Replace call with rtLookupLclNum local and update side effects - *callUse = gtClone(rtLookupLcl); - while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) - { - fgMorphStmtBlockOps(block, newFirstStmt); - newFirstStmt = newFirstStmt->GetNextStmt(); - } - fgMorphStmtBlockOps(block, stmt); - gtUpdateStmtSideEffects(stmt); - // // Update preds in all new blocks // From 07cf9e776c7e3f01ee055ccc24f18f39d7060f3a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 13 Mar 2023 03:33:50 +0100 Subject: [PATCH 53/63] clone fastpath for tier0 --- src/coreclr/jit/runtimelookup.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 8910b6851c6a22..c8ee12cc2f47dd 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -253,7 +253,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() GenTree* fastPathValue = gtNewOperNode(GT_IND, TYP_I_IMPL, gtCloneExpr(slotPtrTree)); fastPathValue->gtFlags |= GTF_IND_NONFAULTING; // Save dictionary slot to a local (to be used by fast path) - GenTree* fastPathValueClone = fgMakeMultiUse(&fastPathValue); + GenTree* fastPathValueClone = + opts.OptimizationEnabled() ? fgMakeMultiUse(&fastPathValue) : gtCloneExpr(fastPathValue); GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; BasicBlock* nullcheckBb = From 3a3f4ca47b9c76c06ebd1989f94354af749987d1 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 13 Mar 2023 03:44:07 +0100 Subject: [PATCH 54/63] Remove redundant impSpillSideEffects --- src/coreclr/jit/importer.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 99811ef44c63f6..f03cccbe342e5d 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1879,8 +1879,6 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken assert(pRuntimeLookup->indirections != 0); - impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark1")); - // Call the helper // - Setup argNode with the pointer to the signature returned by the lookup GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); From 8af93a9c949f595148415816a52ba5b472fb751c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 14 Mar 2023 01:37:03 +0100 Subject: [PATCH 55/63] test --- src/coreclr/jit/runtimelookup.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index c8ee12cc2f47dd..d15975afc09d05 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -24,6 +24,14 @@ static void* GetConstantPointer(Compiler* comp, GenTree* tree) // Save expression to a local and append it as the last statement in exprBlock static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprBlock, DebugInfo& debugInfo) { + if ((expr->gtFlags & GTF_GLOB_EFFECT) == 0) + { + GenTree* clone = comp->gtClone(expr, true); + if (clone) + { + return clone; + } + } unsigned const tmpNum = comp->lvaGrabTemp(true DEBUGARG("spilling expr")); comp->lvaTable[tmpNum].lvType = expr->TypeGet(); Statement* asgStmt = comp->fgNewStmtAtEnd(exprBlock, comp->gtNewTempAssign(tmpNum, expr)); From afe2dd44bbc0184e71424fafbb63695751649ec0 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 14 Mar 2023 02:17:43 +0100 Subject: [PATCH 56/63] Fix regressions --- src/coreclr/jit/importer.cpp | 120 +++++++++++++++--------------- src/coreclr/jit/runtimelookup.cpp | 8 -- 2 files changed, 58 insertions(+), 70 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index f03cccbe342e5d..1fff9332a8e9c6 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1780,15 +1780,36 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken } #endif - // Slot pointer - GenTree* slotPtrTree = ctxTree; - if (pRuntimeLookup->testForNull) { - slotPtrTree = impCloneExpr(ctxTree, &ctxTree, NO_CLASS_HANDLE, CHECK_SPILL_ALL, - nullptr DEBUGARG("impRuntimeLookup slot")); + assert(pRuntimeLookup->indirections != 0); + + // Call the helper + // - Setup argNode with the pointer to the signature returned by the lookup + GenTree* argNode = + gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); + GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); + + // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after + // "Expand runtime lookups" phase. + argNode->gtFlags |= GTF_DONT_CSE; + + // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. + // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 + impInlineRoot()->setMethodHasExpRuntimeLookup(); + helperCall->SetExpRuntimeLookup(); + if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) + { + JITDUMP("Registering %p in SignatureToLookupInfoMap\n", pRuntimeLookup->signature) + impInlineRoot()->GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); + } + unsigned callLclNum = lvaGrabTemp(true DEBUGARG("spilling helperCall")); + impAssignTempGen(callLclNum, helperCall); + return gtNewLclvNode(callLclNum, helperCall->TypeGet()); } + // Slot pointer + GenTree* slotPtrTree = ctxTree; GenTree* indOffTree = nullptr; GenTree* lastIndOfTree = nullptr; @@ -1834,72 +1855,47 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken } // No null test required - if (!pRuntimeLookup->testForNull) - { - if (pRuntimeLookup->indirections == 0) - { - return slotPtrTree; - } - - slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); - slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; + assert(!pRuntimeLookup->testForNull); - if (!pRuntimeLookup->testForFixup) - { - return slotPtrTree; - } - - impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark0")); - - unsigned slotLclNum = lvaGrabTemp(true DEBUGARG("impRuntimeLookup test")); - impAssignTempGen(slotLclNum, slotPtrTree, NO_CLASS_HANDLE, CHECK_SPILL_ALL, nullptr, impCurStmtDI); + if (pRuntimeLookup->indirections == 0) + { + return slotPtrTree; + } - GenTree* slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); - // downcast the pointer to a TYP_INT on 64-bit targets - slot = impImplicitIorI4Cast(slot, TYP_INT); - // Use a GT_AND to check for the lowest bit and indirect if it is set - GenTree* test = gtNewOperNode(GT_AND, TYP_INT, slot, gtNewIconNode(1)); - GenTree* relop = gtNewOperNode(GT_EQ, TYP_INT, test, gtNewIconNode(0)); + slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); + slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; - // slot = GT_IND(slot - 1) - slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); - GenTree* add = gtNewOperNode(GT_ADD, TYP_I_IMPL, slot, gtNewIconNode(-1, TYP_I_IMPL)); - GenTree* indir = gtNewOperNode(GT_IND, TYP_I_IMPL, add); - indir->gtFlags |= GTF_IND_NONFAULTING; - indir->gtFlags |= GTF_IND_INVARIANT; + if (!pRuntimeLookup->testForFixup) + { + return slotPtrTree; + } - slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); - GenTree* asg = gtNewAssignNode(slot, indir); - GenTreeColon* colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), asg); - GenTreeQmark* qmark = gtNewQmarkNode(TYP_VOID, relop, colon); - impAppendTree(qmark, CHECK_SPILL_NONE, impCurStmtDI); + impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark0")); - return gtNewLclvNode(slotLclNum, TYP_I_IMPL); - } + unsigned slotLclNum = lvaGrabTemp(true DEBUGARG("impRuntimeLookup test")); + impAssignTempGen(slotLclNum, slotPtrTree, NO_CLASS_HANDLE, CHECK_SPILL_ALL, nullptr, impCurStmtDI); - assert(pRuntimeLookup->indirections != 0); + GenTree* slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); + // downcast the pointer to a TYP_INT on 64-bit targets + slot = impImplicitIorI4Cast(slot, TYP_INT); + // Use a GT_AND to check for the lowest bit and indirect if it is set + GenTree* test = gtNewOperNode(GT_AND, TYP_INT, slot, gtNewIconNode(1)); + GenTree* relop = gtNewOperNode(GT_EQ, TYP_INT, test, gtNewIconNode(0)); - // Call the helper - // - Setup argNode with the pointer to the signature returned by the lookup - GenTree* argNode = gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_GLOBAL_PTR, compileTimeHandle); - GenTreeCall* helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, ctxTree, argNode); + // slot = GT_IND(slot - 1) + slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); + GenTree* add = gtNewOperNode(GT_ADD, TYP_I_IMPL, slot, gtNewIconNode(-1, TYP_I_IMPL)); + GenTree* indir = gtNewOperNode(GT_IND, TYP_I_IMPL, add); + indir->gtFlags |= GTF_IND_NONFAULTING; + indir->gtFlags |= GTF_IND_INVARIANT; - // No need to perform CSE/hoisting for signature node - it is expected to end up in a rarely-taken block after - // "Expand runtime lookups" phase. - argNode->gtFlags |= GTF_DONT_CSE; + slot = gtNewLclvNode(slotLclNum, TYP_I_IMPL); + GenTree* asg = gtNewAssignNode(slot, indir); + GenTreeColon* colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), asg); + GenTreeQmark* qmark = gtNewQmarkNode(TYP_VOID, relop, colon); + impAppendTree(qmark, CHECK_SPILL_NONE, impCurStmtDI); - // Leave a note that this method has runtime lookups we might want to expand (nullchecks, size checks) later. - // We can also consider marking current block as a runtime lookup holder to improve TP for Tier0 - impInlineRoot()->setMethodHasExpRuntimeLookup(); - helperCall->SetExpRuntimeLookup(); - if (!impInlineRoot()->GetSignatureToLookupInfoMap()->Lookup(pRuntimeLookup->signature)) - { - JITDUMP("Registering %p in SignatureToLookupInfoMap\n", pRuntimeLookup->signature) - impInlineRoot()->GetSignatureToLookupInfoMap()->Set(pRuntimeLookup->signature, *pRuntimeLookup); - } - unsigned callLclNum = lvaGrabTemp(true DEBUGARG("spilling helperCall")); - impAssignTempGen(callLclNum, helperCall); - return gtNewLclvNode(callLclNum, helperCall->TypeGet()); + return gtNewLclvNode(slotLclNum, TYP_I_IMPL); } struct RecursiveGuard diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index d15975afc09d05..c8ee12cc2f47dd 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -24,14 +24,6 @@ static void* GetConstantPointer(Compiler* comp, GenTree* tree) // Save expression to a local and append it as the last statement in exprBlock static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprBlock, DebugInfo& debugInfo) { - if ((expr->gtFlags & GTF_GLOB_EFFECT) == 0) - { - GenTree* clone = comp->gtClone(expr, true); - if (clone) - { - return clone; - } - } unsigned const tmpNum = comp->lvaGrabTemp(true DEBUGARG("spilling expr")); comp->lvaTable[tmpNum].lvType = expr->TypeGet(); Statement* asgStmt = comp->fgNewStmtAtEnd(exprBlock, comp->gtNewTempAssign(tmpNum, expr)); From 7909435eaf167c7dcb73c90ddbb68315a20d990d Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 14 Mar 2023 12:11:22 +0100 Subject: [PATCH 57/63] fix tp regressions --- src/coreclr/jit/runtimelookup.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index c8ee12cc2f47dd..bbc6c9e9c497e6 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -68,6 +68,13 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() { PhaseStatus result = PhaseStatus::MODIFIED_NOTHING; + if (!doesMethodHaveExpRuntimeLookup()) + { + // The method being compiled doesn't have expandable runtime lookups. If it does + // and doesMethodHaveExpRuntimeLookup() still returns false we'll assert in LowerCall + return result; + } + INDEBUG(bool irIsPrinted = false); // Find all calls with GTF_CALL_M_EXP_RUNTIME_LOOKUP flag From 258cbae20b22b8fd3a32b4396f4b757e6648e9ee Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 14 Mar 2023 14:51:53 +0100 Subject: [PATCH 58/63] Apply suggestions from code review Co-authored-by: Jakob Botsch Nielsen --- src/coreclr/jit/runtimelookup.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index bbc6c9e9c497e6..e2ed094093c888 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -26,11 +26,10 @@ static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprB { unsigned const tmpNum = comp->lvaGrabTemp(true DEBUGARG("spilling expr")); comp->lvaTable[tmpNum].lvType = expr->TypeGet(); - Statement* asgStmt = comp->fgNewStmtAtEnd(exprBlock, comp->gtNewTempAssign(tmpNum, expr)); - asgStmt->SetDebugInfo(debugInfo); + Statement* asgStmt = comp->fgNewStmtAtEnd(exprBlock, comp->gtNewTempAssign(tmpNum, expr), debugInfo); comp->gtSetStmtInfo(asgStmt); comp->fgSetStmtSeq(asgStmt); - return comp->gtNewLclvNode(tmpNum, expr->TypeGet()); + return comp->gtNewLclvNode(tmpNum, genActualType(expr)); }; // Create block from the given tree @@ -40,13 +39,8 @@ static BasicBlock* CreateBlockFromTree( // Fast-path basic block BasicBlock* newBlock = comp->fgNewBBafter(blockKind, insertAfter, true); newBlock->bbFlags |= BBF_INTERNAL; - comp->gtSetEvalOrder(tree); - Statement* stmt = comp->fgNewStmtFromTree(tree); + Statement* stmt = comp->fgNewStmtFromTree(tree, debugInfo); comp->fgInsertStmtAtEnd(newBlock, stmt); - stmt->SetDebugInfo(debugInfo); - comp->gtSetStmtInfo(stmt); - comp->fgSetStmtSeq(stmt); - comp->gtUpdateStmtSideEffects(stmt); newBlock->bbCodeOffs = insertAfter->bbCodeOffsEnd; newBlock->bbCodeOffsEnd = insertAfter->bbCodeOffsEnd; return newBlock; From 1afb6f85a65386b4b995041d7d9461db61f89006 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 14 Mar 2023 14:55:22 +0100 Subject: [PATCH 59/63] Address feedback --- src/coreclr/jit/runtimelookup.cpp | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index e2ed094093c888..f09ec8d39e5928 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -24,9 +24,8 @@ static void* GetConstantPointer(Compiler* comp, GenTree* tree) // Save expression to a local and append it as the last statement in exprBlock static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprBlock, DebugInfo& debugInfo) { - unsigned const tmpNum = comp->lvaGrabTemp(true DEBUGARG("spilling expr")); - comp->lvaTable[tmpNum].lvType = expr->TypeGet(); - Statement* asgStmt = comp->fgNewStmtAtEnd(exprBlock, comp->gtNewTempAssign(tmpNum, expr), debugInfo); + unsigned const tmpNum = comp->lvaGrabTemp(true DEBUGARG("spilling expr")); + Statement* asgStmt = comp->fgNewStmtAtEnd(exprBlock, comp->gtNewTempAssign(tmpNum, expr), debugInfo); comp->gtSetStmtInfo(asgStmt); comp->fgSetStmtSeq(asgStmt); return comp->gtNewLclvNode(tmpNum, genActualType(expr)); @@ -69,8 +68,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() return result; } - INDEBUG(bool irIsPrinted = false); - // Find all calls with GTF_CALL_M_EXP_RUNTIME_LOOKUP flag for (BasicBlock* block : Blocks()) { @@ -119,17 +116,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() continue; } -#ifdef DEBUG - // Print full IR before any changes we're goint to make - if (!irIsPrinted && verbose) - { - irIsPrinted = true; - printf("\n*************** Before fgExpandRuntimeLookups()\n"); - fgDispBasicBlocks(true); - printf("\n"); - } -#endif - // Restore runtimeLookup using signature argument via a global dictionary CORINFO_RUNTIME_LOOKUP runtimeLookup = {}; const bool lookupFound = GetSignatureToLookupInfoMap()->Lookup(signature, &runtimeLookup); @@ -415,14 +401,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fgReorderBlocks(/* useProfileData */ false); fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); } - -#ifdef DEBUG - if (verbose) - { - printf("\n*************** After fgExpandRuntimeLookups()\n"); - fgDispBasicBlocks(true); - } -#endif } return result; } From a116695630825b22d86878c7625a0f13346b2086 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 14 Mar 2023 15:01:02 +0100 Subject: [PATCH 60/63] Add goto SCAN_BLOCK_AGAIN --- src/coreclr/jit/runtimelookup.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index f09ec8d39e5928..c4a2b7e7d63d64 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -71,6 +71,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Find all calls with GTF_CALL_M_EXP_RUNTIME_LOOKUP flag for (BasicBlock* block : Blocks()) { + SCAN_BLOCK_AGAIN: for (Statement* const stmt : block->Statements()) { if ((stmt->GetRootNode()->gtFlags & GTF_CALL) == 0) @@ -390,6 +391,9 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // We don't try to re-use expansions for the same lookups in the current block here - CSE is responsible // for that result = PhaseStatus::MODIFIED_EVERYTHING; + + // We've modified the graph and the current "block" might still have more runtime lookups + goto SCAN_BLOCK_AGAIN; } } } From 386446e7b148a348a17d4c833da3c5c16f6efcb6 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 14 Mar 2023 15:27:10 +0100 Subject: [PATCH 61/63] Update src/coreclr/jit/runtimelookup.cpp Co-authored-by: Jakob Botsch Nielsen --- src/coreclr/jit/runtimelookup.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index c4a2b7e7d63d64..3c36a256e9c7ad 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -140,6 +140,15 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() block = fgSplitBlockBeforeTree(block, stmt, call, &newFirstStmt, &callUse); assert(prevBb != nullptr && block != nullptr); + // Block ops inserted by the split need to be morphed here since we are after morph. + // We cannot morph stmt yet as we may modify it further below, and the morphing + // could invalidate callUse. + while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) + { + fgMorphStmtBlockOps(block, newFirstStmt); + newFirstStmt = newFirstStmt->GetNextStmt(); + } + GenTreeLclVar* rtLookupLcl = nullptr; // Mostly for Tier0: if the current statement is ASG(LCL, RuntimeLookup) @@ -163,13 +172,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() lvaTable[rtLookupLclNum].lvType = TYP_I_IMPL; rtLookupLcl = gtNewLclvNode(rtLookupLclNum, call->TypeGet()); - // Replace call with rtLookupLclNum local and update side effects *callUse = gtClone(rtLookupLcl); - while ((newFirstStmt != nullptr) && (newFirstStmt != stmt)) - { - fgMorphStmtBlockOps(block, newFirstStmt); - newFirstStmt = newFirstStmt->GetNextStmt(); - } + fgMorphStmtBlockOps(block, stmt); gtUpdateStmtSideEffects(stmt); } From d2f3fec07b8fcbcca94e23a3abe09be7504075d4 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 14 Mar 2023 15:58:55 +0100 Subject: [PATCH 62/63] Address feedback --- src/coreclr/jit/runtimelookup.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 3c36a256e9c7ad..2de78b3f195a97 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -69,7 +69,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } // Find all calls with GTF_CALL_M_EXP_RUNTIME_LOOKUP flag - for (BasicBlock* block : Blocks()) + // We don't use Blocks() iterator here as we modify `block` variable + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) { SCAN_BLOCK_AGAIN: for (Statement* const stmt : block->Statements()) From 19034d911bf8ddabedc4ca4823389ae2aa8d1ac2 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 14 Mar 2023 17:12:07 +0100 Subject: [PATCH 63/63] update side effects for fallbackBb --- src/coreclr/jit/runtimelookup.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 2de78b3f195a97..58e7d75ae4ba90 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -32,8 +32,12 @@ static GenTree* SpillExpression(Compiler* comp, GenTree* expr, BasicBlock* exprB }; // Create block from the given tree -static BasicBlock* CreateBlockFromTree( - Compiler* comp, BasicBlock* insertAfter, BBjumpKinds blockKind, GenTree* tree, DebugInfo& debugInfo) +static BasicBlock* CreateBlockFromTree(Compiler* comp, + BasicBlock* insertAfter, + BBjumpKinds blockKind, + GenTree* tree, + DebugInfo& debugInfo, + bool updateSideEffects = false) { // Fast-path basic block BasicBlock* newBlock = comp->fgNewBBafter(blockKind, insertAfter, true); @@ -42,6 +46,10 @@ static BasicBlock* CreateBlockFromTree( comp->fgInsertStmtAtEnd(newBlock, stmt); newBlock->bbCodeOffs = insertAfter->bbCodeOffsEnd; newBlock->bbCodeOffsEnd = insertAfter->bbCodeOffsEnd; + if (updateSideEffects) + { + comp->gtUpdateStmtSideEffects(stmt); + } return newBlock; } @@ -256,7 +264,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Fallback basic block GenTree* asgFallbackValue = gtNewAssignNode(gtClone(rtLookupLcl), call); - BasicBlock* fallbackBb = CreateBlockFromTree(this, nullcheckBb, BBJ_NONE, asgFallbackValue, debugInfo); + BasicBlock* fallbackBb = + CreateBlockFromTree(this, nullcheckBb, BBJ_NONE, asgFallbackValue, debugInfo, true); // Fast-path basic block GenTree* asgFastpathValue = gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone);