diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 3a3139db4a1577..2e5b463c63d8c8 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -4848,6 +4848,52 @@ AssertionIndex Compiler::optAssertionIsNonNullInternal(GenTree* return NO_ASSERTION_INDEX; } +//------------------------------------------------------------------------ +// optAssertionVNIsNonNull: See if we can prove that the value of a VN is +// non-null using assertions. +// +// Arguments: +// vn - VN to check +// assertions - set of live assertions +// +// Return Value: +// True if the VN could be proven non-null. +// +bool Compiler::optAssertionVNIsNonNull(ValueNum vn, ASSERT_VALARG_TP assertions) +{ + if (vnStore->IsKnownNonNull(vn)) + { + return true; + } + + // Check each assertion to find if we have a vn != null assertion. + // + BitVecOps::Iter iter(apTraits, assertions); + unsigned index = 0; + while (iter.NextElem(&index)) + { + AssertionIndex assertionIndex = GetAssertionIndex(index); + if (assertionIndex > optAssertionCount) + { + break; + } + AssertionDsc* curAssertion = optGetAssertion(assertionIndex); + if (!curAssertion->CanPropNonNull()) + { + continue; + } + + if (curAssertion->op1.vn != vn) + { + continue; + } + + return true; + } + + return false; +} + /***************************************************************************** * * Given a tree consisting of a call and a set of available assertions, we diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 08307977ac6920..b92664bc0652de 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8025,6 +8025,7 @@ class Compiler AssertionIndex optAssertionIsSubrange(GenTree* tree, IntegralRange range, ASSERT_VALARG_TP assertions); AssertionIndex optAssertionIsSubtype(GenTree* tree, GenTree* methodTableArg, ASSERT_VALARG_TP assertions); AssertionIndex optAssertionIsNonNullInternal(GenTree* op, ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased)); + bool optAssertionVNIsNonNull(ValueNum vn, ASSERT_VALARG_TP assertions); bool optAssertionIsNonNull(GenTree* op, ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased) DEBUGARG(AssertionIndex* pIndex)); diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index bdbece4cc3614e..ae8b9c62ebb964 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -5,29 +5,46 @@ // scalar evolution analysis (see scev.h and scev.cpp for more information // about the scalar evolution analysis). // -// Currently the only optimization done is widening of primary induction -// variables from 32 bits into 64 bits. This is generally only profitable on -// x64 that does not allow zero extension of 32-bit values in addressing modes -// (in contrast, arm64 does have the capability of including zero extensions in -// addressing modes). For x64 this saves a zero extension for every array -// access inside the loop, in exchange for some widening or narrowing stores -// outside the loop: -// - To make sure the new widened IV starts at the right value it is -// initialized to the value of the narrow IV outside the loop (either in the -// preheader or at the def location of the narrow IV). Usually the start -// value is a constant, in which case the widened IV is just initialized to -// the constant value. -// - If the narrow IV is used after the loop we need to store it back from -// the widened IV in the exits. We depend on liveness sets to figure out -// which exits to insert IR into. -// -// These steps ensure that the wide IV has the right value to begin with and -// the old narrow IV still has the right value after the loop. Additionally, -// we must replace every use of the narrow IV inside the loop with the widened -// IV. This is done by a traversal of the IR inside the loop. We do not -// actually widen the uses of the IV; rather, we keep all uses and defs as -// 32-bit, which the backend is able to handle efficiently on x64. Because of -// this we do not need to worry about overflow. +// Currently the following optimizations are done: +// +// IV widening: +// This widens primary induction variables from 32 bits into 64 bits. This is +// generally only profitable on x64 that does not allow zero extension of +// 32-bit values in addressing modes (in contrast, arm64 does have the +// capability of including zero extensions in addressing modes). For x64 this +// saves a zero extension for every array access inside the loop, in exchange +// for some widening or narrowing stores outside the loop: +// - To make sure the new widened IV starts at the right value it is +// initialized to the value of the narrow IV outside the loop (either in +// the preheader or at the def location of the narrow IV). Usually the +// start value is a constant, in which case the widened IV is just +// initialized to the constant value. +// - If the narrow IV is used after the loop we need to store it back from +// the widened IV in the exits. We depend on liveness sets to figure out +// which exits to insert IR into. +// +// These steps ensure that the wide IV has the right value to begin with and +// the old narrow IV still has the right value after the loop. Additionally, +// we must replace every use of the narrow IV inside the loop with the widened +// IV. This is done by a traversal of the IR inside the loop. We do not +// actually widen the uses of the IV; rather, we keep all uses and defs as +// 32-bit, which the backend is able to handle efficiently on x64. Because of +// this we do not need to worry about overflow. +// +// Loop reversing: +// This converts loops that are up-counted into loops that are down-counted. +// Down-counted loops can generally do their IV update and compare in a +// single instruction, bypassing the need to do a separate comparison with a +// bound. +// +// Strength reduction (disabled): +// This changes the stride of primary IVs in a loop to avoid more expensive +// multiplications inside the loop. Commonly the primary IVs are only used +// for indexing memory at some element size, which can end up with these +// multiplications. +// +// Strength reduction frequently relies on reversing the loop to remove the +// last non-multiplied use of the primary IV. // #include "jitpch.h" @@ -1227,6 +1244,7 @@ class StrengthReductionContext bool InitializeCursors(GenTreeLclVarCommon* primaryIVLcl, ScevAddRec* primaryIV); void AdvanceCursors(ArrayStack* cursors, ArrayStack* nextCursors); bool CheckAdvancedCursors(ArrayStack* cursors, int derivedLevel, ScevAddRec** nextIV); + bool StaysWithinManagedObject(ArrayStack* cursors, ScevAddRec* addRec); bool TryReplaceUsesWithNewPrimaryIV(ArrayStack* cursors, ScevAddRec* iv); BasicBlock* FindUpdateInsertionPoint(ArrayStack* cursors); @@ -1344,13 +1362,11 @@ bool StrengthReductionContext::TryStrengthReduce() } assert(nextIV != nullptr); - // We need more sanity checks to allow materializing GC-typed add - // recs. Otherwise we may eagerly form a GC pointer that was only - // lazily formed under some conditions before, which can be - // illegal. For now we just bail. - if (varTypeIsGC(nextIV->Type)) + if (varTypeIsGC(nextIV->Type) && !StaysWithinManagedObject(nextCursors, nextIV)) { - JITDUMP(" Next IV has type %s. Bailing.\n", varTypeName(nextIV->Type)); + JITDUMP( + " Next IV computes a GC pointer that we cannot prove to be inside a managed object. Bailing.\n", + varTypeName(nextIV->Type)); break; } @@ -1694,6 +1710,127 @@ bool StrengthReductionContext::CheckAdvancedCursors(ArrayStack* curs return *nextIV != nullptr; } +//------------------------------------------------------------------------ +// StaysWithinManagedObject: Check whether the specified GC-pointer add-rec can +// be guaranteed to be inside the same managed object for the whole loop. +// +// Parameters: +// cursors - Cursors pointing to next uses that correspond to the specific add-rec. +// addRec - The add recurrence +// +// Returns: +// True if we were able to prove so. +// +bool StrengthReductionContext::StaysWithinManagedObject(ArrayStack* cursors, ScevAddRec* addRec) +{ + int64_t offset; + Scev* baseScev = addRec->Start->PeelAdditions(&offset); + offset = static_cast(offset); + + // We only support arrays here. To strength reduce Span accesses we need + // additional properies on the range designated by a Span that we + // currently do not specify, or we need to prove that the byref we may form + // in the IV update would have been formed anyway by the loop. + if (!baseScev->OperIs(ScevOper::Local) || !baseScev->TypeIs(TYP_REF)) + { + return false; + } + + // Now use the fact that we keep ARR_ADDRs in the IR when we have array + // accesses. + GenTreeArrAddr* arrAddr = nullptr; + for (int i = 0; i < cursors->Height(); i++) + { + CursorInfo& cursor = cursors->BottomRef(i); + GenTree* parent = cursor.Tree->gtGetParent(nullptr); + if ((parent != nullptr) && parent->OperIs(GT_ARR_ADDR)) + { + arrAddr = parent->AsArrAddr(); + break; + } + } + + if (arrAddr == nullptr) + { + return false; + } + + unsigned arrElemSize = arrAddr->GetElemType() == TYP_STRUCT + ? m_comp->typGetObjLayout(arrAddr->GetElemClassHandle())->GetSize() + : genTypeSize(arrAddr->GetElemType()); + + int64_t stepCns; + if (!addRec->Step->GetConstantValue(m_comp, &stepCns) || ((unsigned)stepCns > arrElemSize)) + { + return false; + } + + ScevLocal* local = (ScevLocal*)baseScev; + + ValueNum vn = m_scevContext.MaterializeVN(baseScev); + if (vn == ValueNumStore::NoVN) + { + return false; + } + + BasicBlock* preheader = m_loop->EntryEdge(0)->getSourceBlock(); + if (!m_comp->optAssertionVNIsNonNull(vn, preheader->bbAssertionOut)) + { + return false; + } + + // We have a non-null array. Check that the 'start' offset looks fine. + // TODO: We could also use assertions on the length of the array. E.g. if + // we know the length of the array is > 3, then we can allow the add rec to + // have a later start. Maybe range check can be used? + if ((offset < 0) || (offset > (int64_t)OFFSETOF__CORINFO_Array__data)) + { + return false; + } + + // Now see if we have a bound that guarantees that we iterate less than the + // array length's times. + for (int i = 0; i < m_backEdgeBounds.Height(); i++) + { + // TODO: EvaluateRelop ought to be powerful enough to prove something + // like bound < ARR_LENGTH(vn), but it is not able to prove that + // currently, even for bound = ARR_LENGTH(vn) - 1 (common case). + Scev* bound = m_backEdgeBounds.Bottom(i); + + int64_t boundOffset; + Scev* boundBase = bound->PeelAdditions(&boundOffset); + + if (bound->TypeIs(TYP_INT)) + { + boundOffset = static_cast(boundOffset); + } + + if (boundOffset >= 0) + { + // If we take the backedge >= the array length times, then we would + // advance the addrec past the end. + continue; + } + + ValueNum boundBaseVN = m_scevContext.MaterializeVN(boundBase); + + VNFuncApp vnf; + if (!m_comp->vnStore->GetVNFunc(boundBaseVN, &vnf)) + { + continue; + } + + if ((vnf.m_func != VNF_ARR_LENGTH) || (vnf.m_args[0] != vn)) + { + continue; + } + + return true; + } + + return false; +} + //------------------------------------------------------------------------ // TryReplaceUsesWithNewPrimaryIV: Perform final sanity checks before // introducing a new primary IV and replacing the uses represented by the diff --git a/src/coreclr/jit/scev.cpp b/src/coreclr/jit/scev.cpp index 9053d47bbf55d0..617d3e0a428a67 100644 --- a/src/coreclr/jit/scev.cpp +++ b/src/coreclr/jit/scev.cpp @@ -220,6 +220,48 @@ bool Scev::IsInvariant() return result != ScevVisit::Abort; } +//------------------------------------------------------------------------ +// Scev::PeelAdditions: Peel the aditions from a SCEV and return the base SCEV +// and the sum of the offsets peeled. +// +// Parameters: +// offset - [out] The sum of offsets peeled +// +// Returns: +// The base SCEV. +// +// Remarks: +// If the SCEV is 32-bits, the user is expected to apply the proper +// truncation (or extension into 64-bit). +// +Scev* Scev::PeelAdditions(int64_t* offset) +{ + *offset = 0; + + Scev* scev = this; + while (scev->OperIs(ScevOper::Add)) + { + Scev* op1 = ((ScevBinop*)scev)->Op1; + Scev* op2 = ((ScevBinop*)scev)->Op2; + if (op1->OperIs(ScevOper::Constant)) + { + *offset += ((ScevConstant*)op1)->Value; + scev = op2; + } + else if (op2->OperIs(ScevOper::Constant)) + { + *offset += ((ScevConstant*)op2)->Value; + scev = op1; + } + else + { + break; + } + } + + return scev; +} + //------------------------------------------------------------------------ // Scev::Equals: Check if two SCEV trees are equal. // diff --git a/src/coreclr/jit/scev.h b/src/coreclr/jit/scev.h index e965137b90f1a9..fec05231ce34c8 100644 --- a/src/coreclr/jit/scev.h +++ b/src/coreclr/jit/scev.h @@ -75,6 +75,8 @@ struct Scev bool IsInvariant(); + Scev* PeelAdditions(int64_t* offset); + static bool Equals(Scev* left, Scev* right); };