From a5afa332d0bc0f039cabcf39f07178127b1ae024 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 6 Sep 2022 18:02:34 +0200 Subject: [PATCH 01/10] JIT: Support delegate GDV guards in loop cloning * Support cloning loops based on delegate GDV guards * Do flow-graph opts directly in loop cloning instead of relying on RBO to clean it up (for both type and delegate GDV) --- src/coreclr/jit/compiler.h | 9 +- src/coreclr/jit/indirectcalltransformer.cpp | 5 +- src/coreclr/jit/jitconfigvalues.h | 4 +- src/coreclr/jit/loopcloning.cpp | 444 ++++++++++++++------ src/coreclr/jit/loopcloning.h | 167 ++++++-- src/coreclr/jit/loopcloningopts.h | 1 + 6 files changed, 446 insertions(+), 184 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1da2267e5974e3..5a7df2c5666e39 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7413,17 +7413,17 @@ class Compiler Statement* stmt; const unsigned loopNum; const bool cloneForArrayBounds; - const bool cloneForTypeTests; + const bool cloneForGDVTests; LoopCloneVisitorInfo(LoopCloneContext* context, unsigned loopNum, Statement* stmt, bool cloneForArrayBounds, - bool cloneForTypeTests) + bool cloneForGDVTests) : context(context) , stmt(nullptr) , loopNum(loopNum) , cloneForArrayBounds(cloneForArrayBounds) - , cloneForTypeTests(cloneForTypeTests) + , cloneForGDVTests(cloneForGDVTests) { } }; @@ -7437,6 +7437,8 @@ class Compiler fgWalkResult optCanOptimizeByLoopCloning(GenTree* tree, LoopCloneVisitorInfo* info); bool optObtainLoopCloningOpts(LoopCloneContext* context); bool optIsLoopClonable(unsigned loopInd); + bool optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneVisitorInfo* info); + bool optIsHandleOrIndirOfHandle(GenTree* tree, GenTreeFlags handleType); bool optLoopCloningEnabled(); @@ -7444,6 +7446,7 @@ class Compiler void optDebugLogLoopCloning(BasicBlock* block, Statement* insertBefore); #endif void optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool fastPath)); + void optPerformFlowGraphOptimizations(unsigned loopNum, LoopCloneContext* context); bool optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context); bool optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context); BasicBlock* optInsertLoopChoiceConditions(LoopCloneContext* context, diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index e3d799f734b8ba..6ccbe7f3ab1a64 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -546,7 +546,8 @@ class IndirectCallTransformer // Create temp for this if the tree is costly. if (thisTree->IsLocal()) { - thisTree = compiler->gtCloneExpr(thisTree); + thisTree = compiler->gtCloneExpr(thisTree); + LclVarDsc* dsc = compiler->lvaGetDesc(thisTree->AsLclVarCommon()); } else { @@ -594,7 +595,7 @@ class IndirectCallTransformer // TODO-GDV: Consider duplicating the store at the end of the // cold case for the previous GDV. Then we can reuse the target // if the second check of a chained GDV fails. - bool reuseTarget = (origCall->gtCallMoreFlags & GTF_CALL_M_GUARDED_DEVIRT_CHAIN) == 0; + bool reuseTarget = false; //(origCall->gtCallMoreFlags & GTF_CALL_M_GUARDED_DEVIRT_CHAIN) == 0; if (origCall->IsVirtualVtable()) { GenTree* tarTree = compiler->fgExpandVirtualVtableCallTarget(origCall); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 5690f7a4cdc7ac..603c451fd7a8b2 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -41,8 +41,8 @@ CONFIG_INTEGER(JitBreakOnBadCode, W("JitBreakOnBadCode"), 0) CONFIG_INTEGER(JitBreakOnMinOpts, W("JITBreakOnMinOpts"), 0) // Halt if jit switches to MinOpts CONFIG_INTEGER(JitBreakOnUnsafeCode, W("JitBreakOnUnsafeCode"), 0) CONFIG_INTEGER(JitCloneLoops, W("JitCloneLoops"), 1) // If 0, don't clone. Otherwise clone loops for optimizations. -CONFIG_INTEGER(JitCloneLoopsWithTypeTests, W("JitCloneLoopsWithTypeTests"), 1) // If 0, don't clone loops based on - // invariant type tests +CONFIG_INTEGER(JitCloneLoopsWithGdvTests, W("JitCloneLoopsWithGdvTests"), 1) // If 0, don't clone loops based on + // invariant type/method address tests CONFIG_INTEGER(JitDebugLogLoopCloning, W("JitDebugLogLoopCloning"), 0) // In debug builds log places where loop cloning // optimizations are performed on the fast path. CONFIG_INTEGER(JitDefaultFill, W("JitDefaultFill"), 0xdd) // In debug builds, initialize the memory allocated by the nra diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index b0027ad6717106..e753521a2d73ac 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -131,18 +131,40 @@ GenTree* LC_Ident::ToGenTree(Compiler* comp, BasicBlock* bb) return comp->gtNewIconNode(constant); case Var: return comp->gtNewLclvNode(lclNum, comp->lvaTable[lclNum].lvType); - case ArrLen: - return arrLen.ToGenTree(comp, bb); + case ArrAccess: + return arrAccess.ToGenTree(comp, bb); case Null: return comp->gtNewIconNode(0, TYP_REF); case ClassHandle: return comp->gtNewIconHandleNode((size_t)clsHnd, GTF_ICON_CLASS_HDL); - case Indir: + case IndirOfLocal: { - GenTree* const indir = comp->gtNewIndir(TYP_I_IMPL, comp->gtNewLclvNode(lclNum, TYP_REF)); + GenTree* addr = comp->gtNewLclvNode(lclNum, TYP_REF); + if (indirOffs != 0) + { + addr = comp->gtNewOperNode(GT_ADD, TYP_BYREF, addr, + comp->gtNewIconNode(static_cast(indirOffs), TYP_I_IMPL)); + } + + GenTree* const indir = comp->gtNewIndir(TYP_I_IMPL, addr); indir->gtFlags |= GTF_IND_INVARIANT; return indir; } + case MethodAddr: + { + GenTreeIntCon* methodAddrHandle = comp->gtNewIconHandleNode((size_t)methAddr, GTF_ICON_FTN_ADDR); + INDEBUG(methodAddrHandle->gtTargetHandle = (size_t)targetMethHnd); + return methodAddrHandle; + } + case IndirOfMethodAddrSlot: + { + GenTreeIntCon* slot = comp->gtNewIconHandleNode((size_t)methAddr, GTF_ICON_FTN_ADDR); + INDEBUG(slot->gtTargetHandle = (size_t)targetMethHnd); + GenTree* indir = comp->gtNewIndir(TYP_I_IMPL, slot); + indir->gtFlags |= GTF_IND_NONFAULTING | GTF_IND_INVARIANT; + indir->gtFlags &= ~GTF_EXCEPT; + return indir; + } default: assert(!"Could not convert LC_Ident to GenTree"); unreached(); @@ -937,7 +959,7 @@ void LC_ArrayDeref::DeriveLevelConditions(JitExpandArrayStackPush( - LC_Condition(GT_NE, LC_Expr(LC_Ident(Lcl(), LC_Ident::Var)), LC_Expr(LC_Ident(LC_Ident::Null)))); + LC_Condition(GT_NE, LC_Expr(LC_Ident::CreateVar(Lcl())), LC_Expr(LC_Ident::CreateNull()))); } else { @@ -946,12 +968,13 @@ void LC_ArrayDeref::DeriveLevelConditions(JitExpandArrayStackPush( - LC_Condition(GT_LT, LC_Expr(LC_Ident(Lcl(), LC_Ident::Var)), LC_Expr(LC_Ident(arrLen)))); + LC_Condition(GT_LT, LC_Expr(LC_Ident::CreateVar(Lcl())), LC_Expr(LC_Ident::CreateArrAccess(arrLen)))); // Push condition (a[i] != null) LC_Array arrTmp = array; arrTmp.dim = level; - (*conds)[level * 2]->Push(LC_Condition(GT_NE, LC_Expr(LC_Ident(arrTmp)), LC_Expr(LC_Ident(LC_Ident::Null)))); + (*conds)[level * 2]->Push( + LC_Condition(GT_NE, LC_Expr(LC_Ident::CreateArrAccess(arrTmp)), LC_Expr(LC_Ident::CreateNull()))); } // Invoke on the children recursively. @@ -1073,14 +1096,31 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext case LcOptInfo::LcTypeTest: { LcTypeTestOptInfo* ttInfo = optInfo->AsLcTypeTestOptInfo(); - LC_Ident objDeref = LC_Ident(ttInfo->lclNum, LC_Ident::Indir); - LC_Ident methodTable = LC_Ident(ttInfo->clsHnd, LC_Ident::ClassHandle); + LC_Ident objDeref = LC_Ident::CreateIndirOfLocal(ttInfo->lclNum, 0); + LC_Ident methodTable = LC_Ident::CreateClassHandle(ttInfo->clsHnd); LC_Condition cond(GT_EQ, LC_Expr(objDeref), LC_Expr(methodTable)); context->EnsureObjDerefs(loopNum)->Push(objDeref); context->EnsureConditions(loopNum)->Push(cond); break; } + case LcOptInfo::LcMethodAddrTest: + { + LcMethodAddrTestOptInfo* test = optInfo->AsLcMethodAddrTestOptInfo(); + LC_Ident objDeref = + LC_Ident::CreateIndirOfLocal(test->delegateLclNum, eeGetEEInfo()->offsetOfDelegateFirstTarget); + LC_Ident methAddr; + if (test->isSlot) + methAddr = LC_Ident::CreateIndirMethodAddrSlot(test->methAddr DEBUG_ARG(test->targetMethHnd)); + else + methAddr = LC_Ident::CreateMethodAddr(test->methAddr DEBUG_ARG(test->targetMethHnd)); + LC_Condition cond(GT_EQ, LC_Expr(objDeref), LC_Expr(methAddr)); + + context->EnsureObjDerefs(loopNum)->Push(objDeref); + context->EnsureConditions(loopNum)->Push(cond); + break; + } + default: JITDUMP("Unknown opt\n"); return false; @@ -1140,7 +1180,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext if (!isIncreasingLoop) { // For decreasing loop, the init value needs to be checked against the array length - ident = LC_Ident(static_cast(loop->lpConstInit), LC_Ident::Const); + ident = LC_Ident::CreateConst(static_cast(loop->lpConstInit)); } } else @@ -1156,14 +1196,13 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Condition geZero; if (isIncreasingLoop) { - geZero = - LC_Condition(GT_GE, LC_Expr(LC_Ident(initLcl, LC_Ident::Var)), LC_Expr(LC_Ident(0u, LC_Ident::Const))); + geZero = LC_Condition(GT_GE, LC_Expr(LC_Ident::CreateVar(initLcl)), LC_Expr(LC_Ident::CreateConst(0u))); } else { // For decreasing loop, the init value needs to be checked against the array length - ident = LC_Ident(initLcl, LC_Ident::Var); - geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0u, LC_Ident::Const))); + ident = LC_Ident::CreateVar(initLcl); + geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident::CreateConst(0u))); } context->EnsureConditions(loopNum)->Push(geZero); } @@ -1181,7 +1220,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext if (isIncreasingLoop) { // For increasing loop, thelimit value needs to be checked against the array length - ident = LC_Ident(static_cast(limit), LC_Ident::Const); + ident = LC_Ident::CreateConst(static_cast(limit)); } } else if (loop->lpFlags & LPFLG_VAR_LIMIT) @@ -1197,13 +1236,12 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext if (isIncreasingLoop) { // For increasing loop, thelimit value needs to be checked against the array length - ident = LC_Ident(limitLcl, LC_Ident::Var); - geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0u, LC_Ident::Const))); + ident = LC_Ident::CreateVar(limitLcl); + geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident::CreateConst(0u))); } else { - geZero = - LC_Condition(GT_GE, LC_Expr(LC_Ident(limitLcl, LC_Ident::Var)), LC_Expr(LC_Ident(0u, LC_Ident::Const))); + geZero = LC_Condition(GT_GE, LC_Expr(LC_Ident::CreateVar(limitLcl)), LC_Expr(LC_Ident::CreateConst(0u))); } context->EnsureConditions(loopNum)->Push(geZero); @@ -1216,7 +1254,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext JITDUMP("> ArrLen not matching\n"); return false; } - ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen)); + ident = LC_Ident::CreateArrAccess(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen)); // Ensure that this array must be dereference-able, before executing the actual condition. LC_Array array(LC_Array::Jagged, index, LC_Array::None); @@ -1259,7 +1297,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext { LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo(); LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen); - LC_Ident arrLenIdent = LC_Ident(arrLen); + LC_Ident arrLenIdent = LC_Ident::CreateArrAccess(arrLen); LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); context->EnsureConditions(loopNum)->Push(cond); @@ -1273,7 +1311,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo(); LC_Array arrLen(LC_Array(LC_Array::MdArray, mdArrInfo->GetArrIndexForDim(getAllocator(CMK_LoopClone)), mdArrInfo->dim, LC_Array::None)); - LC_Ident arrLenIdent = LC_Ident(arrLen); + LC_Ident arrLenIdent = LC_Ident::CreateArrAccess(arrLen); LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); context->EnsureConditions(loopNum)->Push(cond); @@ -1513,8 +1551,8 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con // ObjDeref array has indir(lcl), we want lcl. // LC_Ident& mtIndirIdent = (*objDeref)[i]; - LC_Ident ident(mtIndirIdent.LclNum(), LC_Ident::Var); - (*levelCond)[0]->Push(LC_Condition(GT_NE, LC_Expr(ident), LC_Expr(LC_Ident(LC_Ident::Null)))); + LC_Ident ident = LC_Ident::CreateVar(mtIndirIdent.LclNum()); + (*levelCond)[0]->Push(LC_Condition(GT_NE, LC_Expr(ident), LC_Expr(LC_Ident::CreateNull()))); } } @@ -1633,9 +1671,52 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* case LcOptInfo::LcMdArray: // TODO-CQ: CLONE: Implement. break; + + default: + break; + } + } +} + +void Compiler::optPerformFlowGraphOptimizations(unsigned loopNum, LoopCloneContext* context) +{ + JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); + assert(optInfos != nullptr); + for (unsigned i = 0; i < optInfos->Size(); ++i) + { + LcOptInfo* optInfo = optInfos->Get(i); + switch (optInfo->GetOptType()) + { case LcOptInfo::LcTypeTest: - // We could optimize here. For now, let downstream opts clean this up. + case LcOptInfo::LcMethodAddrTest: + { + BasicBlock* guardBlock; + if (optInfo->GetOptType() == LcOptInfo::LcTypeTest) + { + guardBlock = optInfo->AsLcTypeTestOptInfo()->guardBlock; + } + else + { + guardBlock = optInfo->AsLcMethodAddrTestOptInfo()->guardBlock; + } + + GenTree* jtrue = guardBlock->lastStmt()->GetRootNode(); + assert(jtrue->OperIs(GT_JTRUE) && jtrue->gtGetOp1()->OperIs(GT_EQ, GT_NE)); + bool hotIsTrue = jtrue->gtGetOp1()->OperIs(GT_EQ); + if (hotIsTrue) + { + guardBlock->bbJumpKind = BBJ_ALWAYS; + fgRemoveRefPred(guardBlock->bbNext, guardBlock); + } + else + { + guardBlock->bbJumpKind = BBJ_NONE; + fgRemoveRefPred(guardBlock->bbJumpDest, guardBlock); + } + + fgRemoveStmt(guardBlock, guardBlock->lastStmt()); break; + } default: break; @@ -2232,6 +2313,10 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) } #endif // DEBUG + // Now that we have redirected all blocks in the slow loop we can apply FG + // changing opts to the fast loop. + optPerformFlowGraphOptimizations(loopInd, context); + // Insert the loop choice conditions. We will create the following structure: // // h (fall through) @@ -2718,10 +2803,11 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop return WALK_SKIP_SUBTREES; } - if (info->cloneForTypeTests && tree->OperIs(GT_JTRUE)) + if (info->cloneForGDVTests && tree->OperIs(GT_JTRUE)) { - JITDUMP("...TT considering [%06u]\n", dspTreeID(tree)); - // Look for invariant type tests. + JITDUMP("...GDV considering [%06u]\n", dspTreeID(tree)); + assert(info->stmt->GetRootNode() == tree); + // Look for invariant type/method address tests. // GenTree* const relop = tree->AsOp()->gtGetOp1(); @@ -2735,141 +2821,237 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop GenTree* relopOp1 = relop->AsOp()->gtGetOp1(); GenTree* relopOp2 = relop->AsOp()->gtGetOp2(); - // One side or the other must be an indir - // The other must be a loop invariant. - // Currently, we'll just look for a constant. + // One side or the other must be an indir and the other must be a loop + // invariant. Currently, we'll just look for a constant or indir of a + // constant. Start out by normalizing it to the right. // - bool match = false; - if (relopOp1->OperIs(GT_IND) && relopOp2->IsIntegralConst()) - { - match = true; - } - else if (relopOp2->OperIs(GT_IND) && relopOp1->IsIntegralConst()) + if (optIsHandleOrIndirOfHandle(relopOp1, GTF_ICON_CLASS_HDL) || + optIsHandleOrIndirOfHandle(relopOp1, GTF_ICON_FTN_ADDR)) { std::swap(relopOp1, relopOp2); - match = true; } - if (!match) + if (!relopOp1->OperIs(GT_IND) || !relopOp1->TypeIs(TYP_I_IMPL, TYP_REF, TYP_BYREF)) { return WALK_CONTINUE; } - // The indir addr must be loop invariant TYP_REF local - // - GenTree* const indirAddr = relopOp1->AsIndir()->Addr(); - - if (!indirAddr->TypeIs(TYP_REF)) + if (relopOp2->IsIconHandle(GTF_ICON_CLASS_HDL)) { - return WALK_CONTINUE; - } + // The indir addr must be loop invariant TYP_REF local + // + GenTree* const indirAddr = relopOp1->AsIndir()->Addr(); - if (!indirAddr->OperIs(GT_LCL_VAR)) - { - return WALK_CONTINUE; - } + if (!indirAddr->TypeIs(TYP_REF)) + { + return WALK_CONTINUE; + } - if (!relopOp2->IsIconHandle(GTF_ICON_CLASS_HDL)) - { - return WALK_CONTINUE; - } + if (!indirAddr->OperIs(GT_LCL_VAR)) + { + return WALK_CONTINUE; + } - GenTreeLclVarCommon* const indirAddrLcl = indirAddr->AsLclVarCommon(); - const unsigned lclNum = indirAddrLcl->GetLclNum(); + GenTreeLclVarCommon* const indirAddrLcl = indirAddr->AsLclVarCommon(); + const unsigned lclNum = indirAddrLcl->GetLclNum(); - JITDUMP("... right form, V%02u\n", lclNum); + JITDUMP("... right form for type test with local V%02u\n", lclNum); - if (!optIsStackLocalInvariant(info->loopNum, lclNum)) - { - JITDUMP("... but not invariant\n"); - return WALK_CONTINUE; - } + if (!optIsStackLocalInvariant(info->loopNum, lclNum)) + { + JITDUMP("... but not invariant\n"); + return WALK_CONTINUE; + } - // Looks like we found an invariant type test. - // - JITDUMP("Loop " FMT_LP " has invariant type test [%06u] on V%02u ... ", info->loopNum, dspTreeID(tree), lclNum); + // Looks like we found an invariant type test. + // + JITDUMP("Loop " FMT_LP " has invariant type test [%06u] on V%02u\n", info->loopNum, dspTreeID(tree), + lclNum); - // We only want this type test to inspire cloning if - // - // (1) we have profile data - // (2) the loop iterates frequently each time the method is called - // (3) the type test is frequently hit during the loop iteration - // (4) the type test is biased and highly likely to succeed - // - const LoopDsc& loopDsc = optLoopTable[info->loopNum]; - BasicBlock* const loopEntry = loopDsc.lpEntry; - BasicBlock* const typeTestBlock = compCurBB; - double const loopFrequency = 0.50; - double const typeTestFrequency = 0.50; - double const typeTestBias = 0.05; - - // Check for (1) - // - if (!loopEntry->hasProfileWeight() || !typeTestBlock->hasProfileWeight()) - { - JITDUMP(" but loop does not have profile data.\n"); - return WALK_CONTINUE; - } + if (optCheckLoopCloningGDVTestProfitable(relop->AsOp(), info)) + { + // Update the loop context. + // + assert(relopOp2->IsIconHandle(GTF_ICON_CLASS_HDL)); + CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)relopOp2->AsIntConCommon()->IconValue(); - // Check for (2) - // - if (loopEntry->getBBWeight(this) < (loopFrequency * BB_UNITY_WEIGHT)) - { - JITDUMP(" but loop does not iterate often enough.\n"); - return WALK_CONTINUE; + assert(compCurBB->lastStmt() == info->stmt); + info->context->EnsureLoopOptInfo(info->loopNum) + ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(lclNum, clsHnd, compCurBB)); + } } - - // Check for (3) - // - if (typeTestBlock->bbWeight < (typeTestFrequency * loopEntry->bbWeight)) + else if (optIsHandleOrIndirOfHandle(relopOp2, GTF_ICON_FTN_ADDR)) { - JITDUMP(" but type test does not execute often enough within the loop.\n"); - return WALK_CONTINUE; - } + // The indir addr must be loop invariant TYP_REF local + // + GenTree* indirAddr = relopOp1->AsIndir()->Addr(); + + // ▌ JTRUE void + // └──▌ NE int + // ├──▌ CNS_INT(h) long 0x7ffdb1fa4a08 ftn + // └──▌ IND long + // └──▌ ADD byref <- Matching this tree + // ├──▌ LCL_VAR ref V00 arg0 + // └──▌ CNS_INT long 24 + + // We expect indirections of the 'target' fields. Currently we + // support only the simple cases (one target address, i.e. no + // shuffle thunk/unboxing stubs). + + ssize_t offset = 0; + if (indirAddr->OperIs(GT_ADD)) + { + if (!indirAddr->gtGetOp2()->IsCnsIntOrI() || !indirAddr->gtGetOp2()->TypeIs(TYP_I_IMPL) || + indirAddr->gtGetOp2()->IsIconHandle()) + { + return WALK_CONTINUE; + } - // Check for (4) - // - BasicBlock* const hotSuccessor = relop->OperIs(GT_EQ) ? typeTestBlock->bbJumpDest : typeTestBlock->bbNext; - BasicBlock* const coldSuccessor = relop->OperIs(GT_EQ) ? typeTestBlock->bbNext : typeTestBlock->bbJumpDest; + offset = indirAddr->gtGetOp2()->AsIntConCommon()->IconValue(); + indirAddr = indirAddr->gtGetOp1(); + } - if (!hotSuccessor->hasProfileWeight() || !coldSuccessor->hasProfileWeight()) - { - JITDUMP(" but type test successor blocks were not profiled.\n"); - return WALK_CONTINUE; - } + if (!indirAddr->TypeIs(TYP_REF)) + { + return WALK_CONTINUE; + } - if (hotSuccessor->bbWeight == BB_ZERO_WEIGHT) - { - JITDUMP(" but hot successor block " FMT_BB " is rarely run.\n", hotSuccessor->bbNum); - return WALK_CONTINUE; - } + if (!indirAddr->OperIs(GT_LCL_VAR)) + { + return WALK_CONTINUE; + } - if (coldSuccessor->bbWeight > BB_ZERO_WEIGHT) - { - const weight_t bias = coldSuccessor->bbWeight / (hotSuccessor->bbWeight + coldSuccessor->bbWeight); + if (offset != eeGetEEInfo()->offsetOfDelegateFirstTarget) + { + return WALK_CONTINUE; + } + + unsigned lclNum = indirAddr->AsLclVarCommon()->GetLclNum(); + + JITDUMP("... right form for method address test with local V%02u\n", lclNum); - if (bias > typeTestBias) + LclVarDsc* dsc = lvaGetDesc(lclNum); + if (dsc->lvClassHnd == NO_CLASS_HANDLE) { - JITDUMP(" but type test not sufficiently biased: failure likelihood is " FMT_WT " > " FMT_WT "\n", bias, - typeTestBias); + JITDUMP("... but no class handle available for local\n"); return WALK_CONTINUE; } - } - JITDUMP(" passed profile screening\n"); + unsigned attribs = this->info.compCompHnd->getClassAttribs(dsc->lvClassHnd); + if ((attribs & CORINFO_FLG_DELEGATE) == 0) + { + JITDUMP("... but not a delegate instance\n"); + return WALK_CONTINUE; + } - // Update the loop context. - // - assert(relopOp2->IsIconHandle(GTF_ICON_CLASS_HDL)); - CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)relopOp2->AsIntConCommon()->IconValue(); + if (!optIsStackLocalInvariant(info->loopNum, lclNum)) + { + JITDUMP("... but not invariant\n"); + return WALK_CONTINUE; + } - info->context->EnsureLoopOptInfo(info->loopNum) - ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(lclNum, clsHnd)); + JITDUMP("Loop " FMT_LP " has invariant method address test [%06u] on V%02u\n", info->loopNum, + dspTreeID(tree), lclNum); + + if (optCheckLoopCloningGDVTestProfitable(relop->AsOp(), info)) + { + // Update the loop context. + // + GenTreeIntCon* iconHandle = + relopOp2->IsIconHandle() ? relopOp2->AsIntCon() : relopOp2->AsIndir()->Addr()->AsIntCon(); + assert(iconHandle->IsIconHandle(GTF_ICON_FTN_ADDR)); + assert(compCurBB->lastStmt() == info->stmt); + info->context->EnsureLoopOptInfo(info->loopNum) + ->Push(new (this, CMK_LoopOpt) + LcMethodAddrTestOptInfo(lclNum, (void*)iconHandle->IconValue(), relopOp2 != iconHandle, + compCurBB DEBUG_ARG( + (CORINFO_METHOD_HANDLE)iconHandle->gtTargetHandle))); + } + } } return WALK_CONTINUE; } +bool Compiler::optIsHandleOrIndirOfHandle(GenTree* tree, GenTreeFlags handleType) +{ + return tree->OperIs(GT_IND) ? tree->AsIndir()->Addr()->IsIconHandle(handleType) : tree->IsIconHandle(handleType); +} + +bool Compiler::optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneVisitorInfo* info) +{ + JITDUMP("Checking whether cloning is profitable ...\n"); + // We only want GDV tests to inspire cloning if + // + // (1) we have profile data + // (2) the loop iterates frequently each time the method is called + // (3) the test is frequently hit during the loop iteration + // (4) the test is biased and highly likely to succeed + // + const LoopDsc& loopDsc = optLoopTable[info->loopNum]; + BasicBlock* const loopEntry = loopDsc.lpEntry; + BasicBlock* const typeTestBlock = compCurBB; + double const loopFrequency = 0.50; + double const typeTestFrequency = 0.50; + double const typeTestBias = 0.05; + + // Check for (1) + // + if (!loopEntry->hasProfileWeight() || !typeTestBlock->hasProfileWeight()) + { + JITDUMP(" No; loop does not have profile data.\n"); + return WALK_CONTINUE; + } + + // Check for (2) + // + if (loopEntry->getBBWeight(this) < (loopFrequency * BB_UNITY_WEIGHT)) + { + JITDUMP(" No; loop does not iterate often enough.\n"); + return WALK_CONTINUE; + } + + // Check for (3) + // + if (typeTestBlock->bbWeight < (typeTestFrequency * loopEntry->bbWeight)) + { + JITDUMP(" No; guard does not execute often enough within the loop.\n"); + return WALK_CONTINUE; + } + + // Check for (4) + // + BasicBlock* const hotSuccessor = guard->OperIs(GT_EQ) ? typeTestBlock->bbJumpDest : typeTestBlock->bbNext; + BasicBlock* const coldSuccessor = guard->OperIs(GT_EQ) ? typeTestBlock->bbNext : typeTestBlock->bbJumpDest; + + if (!hotSuccessor->hasProfileWeight() || !coldSuccessor->hasProfileWeight()) + { + JITDUMP(" No; guard successor blocks were not profiled.\n"); + return WALK_CONTINUE; + } + + if (hotSuccessor->bbWeight == BB_ZERO_WEIGHT) + { + JITDUMP(" No; guard hot successor block " FMT_BB " is rarely run.\n", hotSuccessor->bbNum); + return WALK_CONTINUE; + } + + if (coldSuccessor->bbWeight > BB_ZERO_WEIGHT) + { + const weight_t bias = coldSuccessor->bbWeight / (hotSuccessor->bbWeight + coldSuccessor->bbWeight); + + if (bias > typeTestBias) + { + JITDUMP(" No; guard not sufficiently biased: failure likelihood is " FMT_WT " > " FMT_WT "\n", bias, + typeTestBias); + return WALK_CONTINUE; + } + } + + JITDUMP(" Yes\n"); + return true; +} + /* static */ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pTree, Compiler::fgWalkData* data) { @@ -2908,16 +3090,16 @@ bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* contex } bool shouldCloneForArrayBounds = canCloneForArrayBounds; - bool shouldCloneForTypeTests = canCloneForTypeTests; + bool shouldCloneForGdvTests = canCloneForTypeTests; #ifdef DEBUG - shouldCloneForTypeTests &= JitConfig.JitCloneLoopsWithTypeTests() != 0; + shouldCloneForGdvTests &= JitConfig.JitCloneLoopsWithGdvTests() != 0; #endif JITDUMP("Checking loop " FMT_LP " for optimization candidates%s%s\n", loopNum, - shouldCloneForArrayBounds ? " (array bounds)" : "", shouldCloneForTypeTests ? " (type tests)" : ""); + shouldCloneForArrayBounds ? " (array bounds)" : "", shouldCloneForGdvTests ? " (GDV tests)" : ""); - LoopCloneVisitorInfo info(context, loopNum, nullptr, shouldCloneForArrayBounds, shouldCloneForTypeTests); + LoopCloneVisitorInfo info(context, loopNum, nullptr, shouldCloneForArrayBounds, shouldCloneForGdvTests); for (BasicBlock* const block : loop.LoopBlocks()) { compCurBB = block; diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index 103454a89ce95a..18d997770e60f8 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -324,9 +324,35 @@ struct LcTypeTestOptInfo : public LcOptInfo unsigned lclNum; // handle being tested for CORINFO_CLASS_HANDLE clsHnd; + BasicBlock* guardBlock; - LcTypeTestOptInfo(unsigned lclNum, CORINFO_CLASS_HANDLE clsHnd) - : LcOptInfo(LcTypeTest), lclNum(lclNum), clsHnd(clsHnd) + LcTypeTestOptInfo(unsigned lclNum, CORINFO_CLASS_HANDLE clsHnd, BasicBlock* guardBlock) + : LcOptInfo(LcTypeTest), lclNum(lclNum), clsHnd(clsHnd), guardBlock(guardBlock) + { + } +}; + +struct LcMethodAddrTestOptInfo : public LcOptInfo +{ + // Invariant local whose target field(s) are tested + unsigned delegateLclNum; + // Invariant tree representing method address on the other side of the test + void* methAddr; + bool isSlot; + BasicBlock* guardBlock; +#ifdef DEBUG + CORINFO_METHOD_HANDLE targetMethHnd; +#endif + + LcMethodAddrTestOptInfo(unsigned delegateLclNum, + void* methAddr, + bool isSlot, + BasicBlock* guardBlock DEBUG_ARG(CORINFO_METHOD_HANDLE targetMethHnd)) + : LcOptInfo(LcMethodAddrTest) + , delegateLclNum(delegateLclNum) + , methAddr(methAddr) + , isSlot(isSlot) + , guardBlock(guardBlock) DEBUG_ARG(targetMethHnd(targetMethHnd)) { } }; @@ -423,9 +449,7 @@ struct LC_Array }; //------------------------------------------------------------------------ -// LC_Ident: symbolic representation of either a constant like 1 or 2, -// or a variable like V02 or V03, or an "LC_Array", or the null constant, -// or a class handle, or an indir of a variable like *V02. +// LC_Ident: symbolic representation of "a value" // struct LC_Ident { @@ -434,24 +458,45 @@ struct LC_Ident Invalid, Const, Var, - ArrLen, + ArrAccess, Null, ClassHandle, - Indir, + IndirOfLocal, + MethodAddr, + IndirOfMethodAddrSlot, }; private: union { - unsigned constant; - unsigned lclNum; - LC_Array arrLen; + unsigned constant; + struct + { + unsigned lclNum; + unsigned indirOffs; + }; + LC_Array arrAccess; CORINFO_CLASS_HANDLE clsHnd; + struct + { + void* methAddr; +#ifdef DEBUG + CORINFO_METHOD_HANDLE targetMethHnd; // for nice disassembly +#endif + }; }; + LC_Ident(IdentType type) : type(type) + { + } + public: // The type of this object IdentType type; + LC_Ident() : type(Invalid) + { + } + // Equality operator bool operator==(const LC_Ident& that) const { @@ -467,12 +512,17 @@ struct LC_Ident case ClassHandle: return (clsHnd == that.clsHnd); case Var: - case Indir: return (lclNum == that.lclNum); - case ArrLen: - return (arrLen == that.arrLen); + case IndirOfLocal: + return (lclNum == that.lclNum) && (indirOffs == that.indirOffs); + case ArrAccess: + return (arrAccess == that.arrAccess); case Null: return true; + case MethodAddr: + return (methAddr == that.methAddr); + case IndirOfMethodAddrSlot: + return (methAddr == that.methAddr); default: assert(!"Unknown LC_Ident type"); unreached(); @@ -481,7 +531,7 @@ struct LC_Ident unsigned LclNum() const { - assert((type == Var) || (type == Indir)); + assert((type == Var) || (type == IndirOfLocal)); return lclNum; } @@ -496,18 +546,27 @@ struct LC_Ident case Var: printf("V%02u", lclNum); break; - case Indir: - printf("*V%02u", lclNum); + case IndirOfLocal: + if (indirOffs != 0) + printf("*(V%02u + %x)", lclNum, indirOffs); + else + printf("*V%02u", lclNum); break; case ClassHandle: printf("%p", clsHnd); break; - case ArrLen: - arrLen.Print(); + case ArrAccess: + arrAccess.Print(); break; case Null: printf("null"); break; + case MethodAddr: + printf("%p", methAddr); + break; + case IndirOfMethodAddrSlot: + printf("[%p]", methAddr); + break; default: printf("INVALID"); break; @@ -515,49 +574,65 @@ struct LC_Ident } #endif - LC_Ident() : type(Invalid) + // Convert this symbolic representation into a tree node. + GenTree* ToGenTree(Compiler* comp, BasicBlock* bb); + + static LC_Ident CreateVar(unsigned lclNum) { + LC_Ident id(Var); + id.lclNum = lclNum; + return id; } - explicit LC_Ident(unsigned val, IdentType type) : type(type) + static LC_Ident CreateIndirOfLocal(unsigned lclNum, unsigned offs) { - if (type == Const) - { - constant = val; - } - else if ((type == Var) || (type == Indir)) - { - lclNum = val; - } - else - { - unreached(); - } + LC_Ident id(IndirOfLocal); + id.lclNum = lclNum; + id.indirOffs = offs; + return id; } - explicit LC_Ident(CORINFO_CLASS_HANDLE val, IdentType type) : type(type) + static LC_Ident CreateConst(unsigned value) { - if (type == ClassHandle) - { - clsHnd = val; - } - else - { - unreached(); - } + LC_Ident id(Const); + id.constant = value; + return id; } - explicit LC_Ident(IdentType type) : type(type) + static LC_Ident CreateArrAccess(const LC_Array& arrLen) { - assert(type == Null); + LC_Ident id(ArrAccess); + id.arrAccess = arrLen; + return id; } - explicit LC_Ident(const LC_Array& arrLen) : arrLen(arrLen), type(ArrLen) + static LC_Ident CreateNull() { + return LC_Ident(Null); } - // Convert this symbolic representation into a tree node. - GenTree* ToGenTree(Compiler* comp, BasicBlock* bb); + static LC_Ident CreateClassHandle(CORINFO_CLASS_HANDLE clsHnd) + { + LC_Ident id(ClassHandle); + id.clsHnd = clsHnd; + return id; + } + + static LC_Ident CreateMethodAddr(void* methAddr DEBUG_ARG(CORINFO_METHOD_HANDLE methHnd)) + { + LC_Ident id(MethodAddr); + id.methAddr = methAddr; + INDEBUG(id.targetMethHnd = methHnd); + return id; + } + + static LC_Ident CreateIndirMethodAddrSlot(void* methAddrSlot DEBUG_ARG(CORINFO_METHOD_HANDLE methHnd)) + { + LC_Ident id(IndirOfMethodAddrSlot); + id.methAddr = methAddrSlot; + INDEBUG(id.targetMethHnd = methHnd); + return id; + } }; /** diff --git a/src/coreclr/jit/loopcloningopts.h b/src/coreclr/jit/loopcloningopts.h index 2df5e7baf63b5f..2fb13937e2f86a 100644 --- a/src/coreclr/jit/loopcloningopts.h +++ b/src/coreclr/jit/loopcloningopts.h @@ -12,5 +12,6 @@ LC_OPT(LcMdArray) LC_OPT(LcJaggedArray) LC_OPT(LcTypeTest) +LC_OPT(LcMethodAddrTest) #undef LC_OPT From 9867d1d1fb86f29ef2d00ab20e83f6f73526bd06 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 6 Sep 2022 18:56:23 +0200 Subject: [PATCH 02/10] Minor clean ups --- src/coreclr/jit/indirectcalltransformer.cpp | 1 - src/coreclr/jit/loopcloning.cpp | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index 6ccbe7f3ab1a64..f8a53d4eb7e07b 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -547,7 +547,6 @@ class IndirectCallTransformer if (thisTree->IsLocal()) { thisTree = compiler->gtCloneExpr(thisTree); - LclVarDsc* dsc = compiler->lvaGetDesc(thisTree->AsLclVarCommon()); } else { diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index e753521a2d73ac..c043aaa7cc790a 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2821,7 +2821,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop GenTree* relopOp1 = relop->AsOp()->gtGetOp1(); GenTree* relopOp2 = relop->AsOp()->gtGetOp2(); - // One side or the other must be an indir and the other must be a loop + // One side or the other must be an indir and the other must be loop // invariant. Currently, we'll just look for a constant or indir of a // constant. Start out by normalizing it to the right. // @@ -2882,8 +2882,6 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop } else if (optIsHandleOrIndirOfHandle(relopOp2, GTF_ICON_FTN_ADDR)) { - // The indir addr must be loop invariant TYP_REF local - // GenTree* indirAddr = relopOp1->AsIndir()->Addr(); // ▌ JTRUE void From 3069d1229ba7e5cbe321656f9e0a3dc542ac61a6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 6 Sep 2022 19:18:32 +0200 Subject: [PATCH 03/10] Fix x86 build --- src/coreclr/jit/loopcloning.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index c043aaa7cc790a..96e586ddf4a018 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2919,7 +2919,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop return WALK_CONTINUE; } - if (offset != eeGetEEInfo()->offsetOfDelegateFirstTarget) + if (offset != static_cast(eeGetEEInfo()->offsetOfDelegateFirstTarget)) { return WALK_CONTINUE; } From 1c34ec1daeb3242cfa13825264452e1d03782cec Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 7 Sep 2022 13:47:21 +0200 Subject: [PATCH 04/10] Run jit-format --- src/coreclr/jit/indirectcalltransformer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index f8a53d4eb7e07b..7486d703a5592b 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -546,7 +546,7 @@ class IndirectCallTransformer // Create temp for this if the tree is costly. if (thisTree->IsLocal()) { - thisTree = compiler->gtCloneExpr(thisTree); + thisTree = compiler->gtCloneExpr(thisTree); } else { From 17169ab6fbdc3a2fd344f82ef1782992dfc8196a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 7 Sep 2022 17:04:36 +0200 Subject: [PATCH 05/10] Remove FG opts Rely on RBO like type test opt. --- src/coreclr/jit/compiler.h | 1 - src/coreclr/jit/indirectcalltransformer.cpp | 1 + src/coreclr/jit/loopcloning.cpp | 50 --------------------- 3 files changed, 1 insertion(+), 51 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5a7df2c5666e39..9535b1af5bbb9d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7446,7 +7446,6 @@ class Compiler void optDebugLogLoopCloning(BasicBlock* block, Statement* insertBefore); #endif void optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool fastPath)); - void optPerformFlowGraphOptimizations(unsigned loopNum, LoopCloneContext* context); bool optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context); bool optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context); BasicBlock* optInsertLoopChoiceConditions(LoopCloneContext* context, diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index 7486d703a5592b..fa90082a148bff 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -638,6 +638,7 @@ class IndirectCallTransformer TYP_I_IMPL); GenTree* tarTree = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, thisTree, offset); tarTree = compiler->gtNewIndir(TYP_I_IMPL, tarTree); + tarTree->gtFlags |= GTF_IND_INVARIANT; if (reuseTarget) { diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 96e586ddf4a018..d5d7a48fabcbc9 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1678,52 +1678,6 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* } } -void Compiler::optPerformFlowGraphOptimizations(unsigned loopNum, LoopCloneContext* context) -{ - JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); - assert(optInfos != nullptr); - for (unsigned i = 0; i < optInfos->Size(); ++i) - { - LcOptInfo* optInfo = optInfos->Get(i); - switch (optInfo->GetOptType()) - { - case LcOptInfo::LcTypeTest: - case LcOptInfo::LcMethodAddrTest: - { - BasicBlock* guardBlock; - if (optInfo->GetOptType() == LcOptInfo::LcTypeTest) - { - guardBlock = optInfo->AsLcTypeTestOptInfo()->guardBlock; - } - else - { - guardBlock = optInfo->AsLcMethodAddrTestOptInfo()->guardBlock; - } - - GenTree* jtrue = guardBlock->lastStmt()->GetRootNode(); - assert(jtrue->OperIs(GT_JTRUE) && jtrue->gtGetOp1()->OperIs(GT_EQ, GT_NE)); - bool hotIsTrue = jtrue->gtGetOp1()->OperIs(GT_EQ); - if (hotIsTrue) - { - guardBlock->bbJumpKind = BBJ_ALWAYS; - fgRemoveRefPred(guardBlock->bbNext, guardBlock); - } - else - { - guardBlock->bbJumpKind = BBJ_NONE; - fgRemoveRefPred(guardBlock->bbJumpDest, guardBlock); - } - - fgRemoveStmt(guardBlock, guardBlock->lastStmt()); - break; - } - - default: - break; - } - } -} - //---------------------------------------------------------------------------- // optIsLoopClonable: Determine whether this loop can be cloned. // @@ -2313,10 +2267,6 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) } #endif // DEBUG - // Now that we have redirected all blocks in the slow loop we can apply FG - // changing opts to the fast loop. - optPerformFlowGraphOptimizations(loopInd, context); - // Insert the loop choice conditions. We will create the following structure: // // h (fall through) From cab5b3f6f1ea7ee471e438fa06994240dadc6c85 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 7 Sep 2022 17:28:45 +0200 Subject: [PATCH 06/10] Readd some code, add function headers --- src/coreclr/jit/loopcloning.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index d5d7a48fabcbc9..da895eab7f726f 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1671,6 +1671,10 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* case LcOptInfo::LcMdArray: // TODO-CQ: CLONE: Implement. break; + case LcOptInfo::LcTypeTest: + case LcOptInfo::LcMethodAddrTest: + // We could optimize here. For now, let downstream opts clean this up. + break; default: break; @@ -2921,11 +2925,33 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop return WALK_CONTINUE; } +//---------------------------------------------------------------------------- +// optIsHandleOrIndirOfHandle: +// Check if a tree is a specified handle type or indirection of that handle type. +// +// Arguments: +// tree - the tree +// handleType - the type of handle to check for +// +// Returns: +// True if the tree is such a handle. +// bool Compiler::optIsHandleOrIndirOfHandle(GenTree* tree, GenTreeFlags handleType) { return tree->OperIs(GT_IND) ? tree->AsIndir()->Addr()->IsIconHandle(handleType) : tree->IsIconHandle(handleType); } +//---------------------------------------------------------------------------- +// optCheckLoopCloningGDVTestProfitable: +// Check heuristically if doing loop cloning for a GDV test is profitable. +// +// Arguments: +// guard - the GDV test +// info - info for the cloning we are doing +// +// Returns: +// True if cloning is considered profitable. +// bool Compiler::optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneVisitorInfo* info) { JITDUMP("Checking whether cloning is profitable ...\n"); From 9a532628a30adf606e6d00ccf5d83b0b5e58285d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 9 Sep 2022 22:52:48 +0200 Subject: [PATCH 07/10] Print in base 10 --- src/coreclr/jit/loopcloning.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index 18d997770e60f8..4c9a8465c6d2e9 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -548,9 +548,13 @@ struct LC_Ident break; case IndirOfLocal: if (indirOffs != 0) - printf("*(V%02u + %x)", lclNum, indirOffs); + { + printf("*(V%02u + %u)", lclNum, indirOffs); + } else + { printf("*V%02u", lclNum); + } break; case ClassHandle: printf("%p", clsHnd); From 03cdacbb3fdcbe0aab8ab42ecc4b3a22049e5e97 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 14 Sep 2022 13:15:22 +0200 Subject: [PATCH 08/10] Remove reuse temp logic, small cleanups --- src/coreclr/jit/indirectcalltransformer.cpp | 75 +-------------------- src/coreclr/jit/loopcloning.cpp | 17 +++-- src/coreclr/jit/loopcloning.h | 20 +++--- 3 files changed, 20 insertions(+), 92 deletions(-) diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index fa90082a148bff..245e753b86caae 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -450,11 +450,9 @@ class IndirectCallTransformer class GuardedDevirtualizationTransformer final : public Transformer { - unsigned m_targetLclNum; - public: GuardedDevirtualizationTransformer(Compiler* compiler, BasicBlock* block, Statement* stmt) - : Transformer(compiler, block, stmt), m_targetLclNum(BAD_VAR_NUM), returnTemp(BAD_VAR_NUM) + : Transformer(compiler, block, stmt), returnTemp(BAD_VAR_NUM) { } @@ -591,25 +589,10 @@ class IndirectCallTransformer // which case the check will be moved into the success case of // a previous GDV and thus may not execute when we hit the cold // path. - // TODO-GDV: Consider duplicating the store at the end of the - // cold case for the previous GDV. Then we can reuse the target - // if the second check of a chained GDV fails. - bool reuseTarget = false; //(origCall->gtCallMoreFlags & GTF_CALL_M_GUARDED_DEVIRT_CHAIN) == 0; if (origCall->IsVirtualVtable()) { GenTree* tarTree = compiler->fgExpandVirtualVtableCallTarget(origCall); - if (reuseTarget) - { - m_targetLclNum = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt call target temp")); - - GenTree* asgTree = compiler->gtNewTempAssign(m_targetLclNum, tarTree); - Statement* asgStmt = compiler->fgNewStmtFromTree(asgTree, stmt->GetDebugInfo()); - compiler->fgInsertStmtAtEnd(checkBlock, asgStmt); - - tarTree = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL); - } - CORINFO_METHOD_HANDLE methHnd = guardedInfo->guardedMethodHandle; CORINFO_CONST_LOOKUP lookup; compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &lookup); @@ -619,20 +602,6 @@ class IndirectCallTransformer } else { - // Reusing the call target for delegates is more - // complicated. Essentially we need to do the - // transformation done in LowerDelegateInvoke by converting - // the call to CT_INDIRECT and reusing the target address. - // We will do that transformation in CreateElse, but here - // we need to stash the target. - CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef TARGET_ARM - // Not impossible to support, but would additionally - // require us to load the wrapper delegate cell when - // expanding. - reuseTarget &= (origCall->gtCallMoreFlags & GTF_CALL_M_WRAPPER_DELEGATE_INV) == 0; -#endif - GenTree* offset = compiler->gtNewIconNode((ssize_t)compiler->eeGetEEInfo()->offsetOfDelegateFirstTarget, TYP_I_IMPL); @@ -640,16 +609,6 @@ class IndirectCallTransformer tarTree = compiler->gtNewIndir(TYP_I_IMPL, tarTree); tarTree->gtFlags |= GTF_IND_INVARIANT; - if (reuseTarget) - { - m_targetLclNum = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt call target temp")); - - GenTree* asgTree = compiler->gtNewTempAssign(m_targetLclNum, tarTree); - Statement* asgStmt = compiler->fgNewStmtFromTree(asgTree, stmt->GetDebugInfo()); - compiler->fgInsertStmtAtEnd(checkBlock, asgStmt); - tarTree = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL); - } - CORINFO_METHOD_HANDLE methHnd = guardedInfo->guardedMethodHandle; CORINFO_CONST_LOOKUP lookup; compiler->info.compCompHnd->getFunctionFixedEntryPoint(methHnd, false, &lookup); @@ -939,38 +898,6 @@ class IndirectCallTransformer newStmt->SetRootNode(assign); } - if (m_targetLclNum != BAD_VAR_NUM) - { - if (call->IsVirtualVtable()) - { - // We already loaded the target once for the check, so reuse it from the temp. - call->gtControlExpr = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL); - call->SetExpandedEarly(); - } - else if (call->IsDelegateInvoke()) - { - // Target was saved into a temp during check. We expand the - // delegate call to a CT_INDIRECT call that uses the target - // directly, somewhat similarly to LowerDelegateInvoke. - call->gtCallType = CT_INDIRECT; - call->gtCallAddr = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL); - call->gtCallCookie = nullptr; - call->gtCallMoreFlags &= ~GTF_CALL_M_DELEGATE_INV; - - GenTree* thisOffset = - compiler->gtNewIconNode((ssize_t)compiler->eeGetEEInfo()->offsetOfDelegateInstance, TYP_I_IMPL); - CallArg* thisArg = call->gtArgs.GetThisArg(); - GenTree* delegateObj = thisArg->GetNode(); - - assert(delegateObj->OperIsLocal()); - GenTree* newThis = - compiler->gtNewOperNode(GT_ADD, TYP_BYREF, compiler->gtCloneExpr(delegateObj), thisOffset); - newThis = compiler->gtNewIndir(TYP_REF, newThis); - - thisArg->SetEarlyNode(newThis); - } - } - compiler->fgInsertStmtAtEnd(elseBlock, newStmt); // Set the original statement to a nop. diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index da895eab7f726f..a081634ca81b5b 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1111,9 +1111,14 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Ident::CreateIndirOfLocal(test->delegateLclNum, eeGetEEInfo()->offsetOfDelegateFirstTarget); LC_Ident methAddr; if (test->isSlot) + { methAddr = LC_Ident::CreateIndirMethodAddrSlot(test->methAddr DEBUG_ARG(test->targetMethHnd)); + } else + { methAddr = LC_Ident::CreateMethodAddr(test->methAddr DEBUG_ARG(test->targetMethHnd)); + } + LC_Condition cond(GT_EQ, LC_Expr(objDeref), LC_Expr(methAddr)); context->EnsureObjDerefs(loopNum)->Push(objDeref); @@ -2831,7 +2836,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop assert(compCurBB->lastStmt() == info->stmt); info->context->EnsureLoopOptInfo(info->loopNum) - ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(lclNum, clsHnd, compCurBB)); + ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(lclNum, clsHnd)); } } else if (optIsHandleOrIndirOfHandle(relopOp2, GTF_ICON_FTN_ADDR)) @@ -2913,11 +2918,11 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop relopOp2->IsIconHandle() ? relopOp2->AsIntCon() : relopOp2->AsIndir()->Addr()->AsIntCon(); assert(iconHandle->IsIconHandle(GTF_ICON_FTN_ADDR)); assert(compCurBB->lastStmt() == info->stmt); - info->context->EnsureLoopOptInfo(info->loopNum) - ->Push(new (this, CMK_LoopOpt) - LcMethodAddrTestOptInfo(lclNum, (void*)iconHandle->IconValue(), relopOp2 != iconHandle, - compCurBB DEBUG_ARG( - (CORINFO_METHOD_HANDLE)iconHandle->gtTargetHandle))); + LcMethodAddrTestOptInfo* optInfo = new (this, CMK_LoopOpt) + LcMethodAddrTestOptInfo(lclNum, (void*)iconHandle->IconValue(), + relopOp2 != iconHandle DEBUG_ARG( + (CORINFO_METHOD_HANDLE)iconHandle->gtTargetHandle)); + info->context->EnsureLoopOptInfo(info->loopNum)->Push(optInfo); } } } diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index 4c9a8465c6d2e9..f22a0ca9522346 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -324,10 +324,9 @@ struct LcTypeTestOptInfo : public LcOptInfo unsigned lclNum; // handle being tested for CORINFO_CLASS_HANDLE clsHnd; - BasicBlock* guardBlock; - LcTypeTestOptInfo(unsigned lclNum, CORINFO_CLASS_HANDLE clsHnd, BasicBlock* guardBlock) - : LcOptInfo(LcTypeTest), lclNum(lclNum), clsHnd(clsHnd), guardBlock(guardBlock) + LcTypeTestOptInfo(unsigned lclNum, CORINFO_CLASS_HANDLE clsHnd) + : LcOptInfo(LcTypeTest), lclNum(lclNum), clsHnd(clsHnd) { } }; @@ -337,22 +336,19 @@ struct LcMethodAddrTestOptInfo : public LcOptInfo // Invariant local whose target field(s) are tested unsigned delegateLclNum; // Invariant tree representing method address on the other side of the test - void* methAddr; - bool isSlot; - BasicBlock* guardBlock; + void* methAddr; + bool isSlot; #ifdef DEBUG CORINFO_METHOD_HANDLE targetMethHnd; #endif - LcMethodAddrTestOptInfo(unsigned delegateLclNum, - void* methAddr, - bool isSlot, - BasicBlock* guardBlock DEBUG_ARG(CORINFO_METHOD_HANDLE targetMethHnd)) + LcMethodAddrTestOptInfo(unsigned delegateLclNum, + void* methAddr, + bool isSlot DEBUG_ARG(CORINFO_METHOD_HANDLE targetMethHnd)) : LcOptInfo(LcMethodAddrTest) , delegateLclNum(delegateLclNum) , methAddr(methAddr) - , isSlot(isSlot) - , guardBlock(guardBlock) DEBUG_ARG(targetMethHnd(targetMethHnd)) + , isSlot(isSlot) DEBUG_ARG(targetMethHnd(targetMethHnd)) { } }; From b9ca3db66fdc24b977086f2fd181fd1ef7b8e634 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 19 Oct 2022 16:58:42 +0200 Subject: [PATCH 09/10] Remove exceptions on indirs in guards after loop cloning This prevents RBO from optimizing them away if the loop is inside an EH handler (e.g. foreach loop). --- src/coreclr/jit/loopcloning.cpp | 33 +++++++++++++++++++++++++++------ src/coreclr/jit/loopcloning.h | 20 ++++++++++++++++---- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index bd803403d92d99..822b71e30c8f3a 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1678,8 +1678,29 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* break; case LcOptInfo::LcTypeTest: case LcOptInfo::LcMethodAddrTest: - // We could optimize here. For now, let downstream opts clean this up. + { + Statement* stmt; + GenTreeIndir* indir; + + if (optInfo->GetOptType() == LcOptInfo::LcTypeTest) + { + LcTypeTestOptInfo* typeTestInfo = optInfo->AsLcTypeTestOptInfo(); + stmt = typeTestInfo->stmt; + indir = typeTestInfo->methodTableIndir; + } + else + { + LcMethodAddrTestOptInfo* methodTestInfo = optInfo->AsLcMethodAddrTestOptInfo(); + stmt = methodTestInfo->stmt; + indir = methodTestInfo->delegateAddressIndir; + } + + indir->gtFlags |= GTF_ORDER_SIDEEFF | GTF_IND_NONFAULTING; + indir->gtFlags &= ~GTF_EXCEPT; + assert(!fgStmtListThreaded); + gtUpdateStmtSideEffects(stmt); break; + } default: break; @@ -2795,11 +2816,13 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop return WALK_CONTINUE; } + GenTreeIndir* indir = relopOp1->AsIndir(); + GenTree* indirAddr = indir->Addr(); + if (relopOp2->IsIconHandle(GTF_ICON_CLASS_HDL)) { // The indir addr must be loop invariant TYP_REF local // - GenTree* const indirAddr = relopOp1->AsIndir()->Addr(); if (!indirAddr->TypeIs(TYP_REF)) { @@ -2836,13 +2859,11 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop assert(compCurBB->lastStmt() == info->stmt); info->context->EnsureLoopOptInfo(info->loopNum) - ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(lclNum, clsHnd)); + ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(info->stmt, indir, lclNum, clsHnd)); } } else if (optIsHandleOrIndirOfHandle(relopOp2, GTF_ICON_FTN_ADDR)) { - GenTree* indirAddr = relopOp1->AsIndir()->Addr(); - // ▌ JTRUE void // └──▌ NE int // ├──▌ CNS_INT(h) long 0x7ffdb1fa4a08 ftn @@ -2919,7 +2940,7 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop assert(iconHandle->IsIconHandle(GTF_ICON_FTN_ADDR)); assert(compCurBB->lastStmt() == info->stmt); LcMethodAddrTestOptInfo* optInfo = new (this, CMK_LoopOpt) - LcMethodAddrTestOptInfo(lclNum, (void*)iconHandle->IconValue(), + LcMethodAddrTestOptInfo(info->stmt, indir, lclNum, (void*)iconHandle->IconValue(), relopOp2 != iconHandle DEBUG_ARG( (CORINFO_METHOD_HANDLE)iconHandle->gtTargetHandle)); info->context->EnsureLoopOptInfo(info->loopNum)->Push(optInfo); diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index f22a0ca9522346..9ab83531d0f522 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -320,19 +320,27 @@ struct LcJaggedArrayOptInfo : public LcOptInfo // struct LcTypeTestOptInfo : public LcOptInfo { + // statement where the opportunity occurs + Statement* stmt; + // indir for the method table + GenTreeIndir* methodTableIndir; // local whose method table is tested unsigned lclNum; // handle being tested for CORINFO_CLASS_HANDLE clsHnd; - LcTypeTestOptInfo(unsigned lclNum, CORINFO_CLASS_HANDLE clsHnd) - : LcOptInfo(LcTypeTest), lclNum(lclNum), clsHnd(clsHnd) + LcTypeTestOptInfo(Statement* stmt, GenTreeIndir* methodTableIndir, unsigned lclNum, CORINFO_CLASS_HANDLE clsHnd) + : LcOptInfo(LcTypeTest), stmt(stmt), methodTableIndir(methodTableIndir), lclNum(lclNum), clsHnd(clsHnd) { } }; struct LcMethodAddrTestOptInfo : public LcOptInfo { + // statement where the opportunity occurs + Statement* stmt; + // indir on the delegate + GenTreeIndir* delegateAddressIndir; // Invariant local whose target field(s) are tested unsigned delegateLclNum; // Invariant tree representing method address on the other side of the test @@ -342,10 +350,14 @@ struct LcMethodAddrTestOptInfo : public LcOptInfo CORINFO_METHOD_HANDLE targetMethHnd; #endif - LcMethodAddrTestOptInfo(unsigned delegateLclNum, - void* methAddr, + LcMethodAddrTestOptInfo(Statement* stmt, + GenTreeIndir* delegateAddressIndir, + unsigned delegateLclNum, + void* methAddr, bool isSlot DEBUG_ARG(CORINFO_METHOD_HANDLE targetMethHnd)) : LcOptInfo(LcMethodAddrTest) + , stmt(stmt) + , delegateAddressIndir(delegateAddressIndir) , delegateLclNum(delegateLclNum) , methAddr(methAddr) , isSlot(isSlot) DEBUG_ARG(targetMethHnd(targetMethHnd)) From a569adfa521a02ea31eb69ec369315ef636c3319 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 19 Oct 2022 20:45:52 +0200 Subject: [PATCH 10/10] Add some JitDump --- src/coreclr/jit/loopcloning.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 822b71e30c8f3a..d99e25ac45c1a8 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1695,10 +1695,17 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* indir = methodTestInfo->delegateAddressIndir; } + JITDUMP("Updating flags on GDV guard inside hot loop. Before:\n"); + DISPSTMT(stmt); + indir->gtFlags |= GTF_ORDER_SIDEEFF | GTF_IND_NONFAULTING; indir->gtFlags &= ~GTF_EXCEPT; assert(!fgStmtListThreaded); gtUpdateStmtSideEffects(stmt); + + JITDUMP("After:\n"); + DISPSTMT(stmt); + break; }