diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 88152601a75647..83bcd1e5dfe694 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -4212,6 +4212,10 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const return EA_16BYTE; } + case INS_vbroadcastf32x8: + case INS_vbroadcasti32x8: + case INS_vbroadcasti64x4: + case INS_vbroadcastf64x4: case INS_vextractf32x8: case INS_vextracti32x8: case INS_vextractf64x4: diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 1cbf6d269bb5b8..bc1d3481cff06c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -30706,7 +30706,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) case NI_Vector256_ToVector512: { - assert(retType == TYP_SIMD32); + assert(retType == TYP_SIMD64); assert(cnsNode->gtType == TYP_SIMD32); cnsNode->AsVecCon()->gtSimd64Val.v256[1] = {}; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 1363592e41df87..3f839c4ae8ec6f 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1640,6 +1640,32 @@ struct GenTree bool OperIsHWIntrinsic(NamedIntrinsic intrinsicId) const; + bool OperIsConvertMaskToVector() const + { +#if defined(FEATURE_HW_INTRINSICS) +#if defined(TARGET_XARCH) + return OperIsHWIntrinsic(NI_EVEX_ConvertMaskToVector); +#elif defined(TARGET_ARM64) + return OperIsHWIntrinsic(NI_Sve_ConvertMaskToVector); +#endif // !TARGET_XARCH && !TARGET_ARM64 +#else + return false; +#endif // FEATURE_HW_INTRINSICS + } + + bool OperIsConvertVectorToMask() const + { +#if defined(FEATURE_HW_INTRINSICS) +#if defined(TARGET_XARCH) + return OperIsHWIntrinsic(NI_EVEX_ConvertVectorToMask); +#elif defined(TARGET_ARM64) + return OperIsHWIntrinsic(NI_Sve_ConvertVectorToMask); +#endif // !TARGET_XARCH && !TARGET_ARM64 +#else + return false; +#endif // FEATURE_HW_INTRINSICS + } + // This is here for cleaner GT_LONG #ifdefs. static bool OperIsLong(genTreeOps gtOper) { @@ -6499,28 +6525,6 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsBitwiseHWIntrinsic() const; bool OperIsEmbRoundingEnabled() const; - bool OperIsConvertMaskToVector() const - { -#if defined(TARGET_XARCH) - return GetHWIntrinsicId() == NI_EVEX_ConvertMaskToVector; -#elif defined(TARGET_ARM64) - return GetHWIntrinsicId() == NI_Sve_ConvertMaskToVector; -#else - return false; -#endif // TARGET_ARM64 && FEATURE_MASKED_HW_INTRINSICS - } - - bool OperIsConvertVectorToMask() const - { -#if defined(TARGET_XARCH) - return GetHWIntrinsicId() == NI_EVEX_ConvertVectorToMask; -#elif defined(TARGET_ARM64) - return GetHWIntrinsicId() == NI_Sve_ConvertVectorToMask; -#else - return false; -#endif - } - bool OperRequiresAsgFlag() const; bool OperRequiresCallFlag() const; bool OperRequiresGlobRefFlag() const; diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index ca37c3a951c59a..1ebfc98d173180 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1264,9 +1264,10 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) if (isContainableMemory || !op2->OperIsConst()) { - unsigned simdSize = node->GetSimdSize(); - var_types simdBaseType = node->GetSimdBaseType(); - var_types simdType = Compiler::getSIMDTypeForSize(simdSize); + unsigned simdSize = node->GetSimdSize(); + CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); + var_types simdBaseType = node->GetSimdBaseType(); + var_types simdType = Compiler::getSIMDTypeForSize(simdSize); // We're either already loading from memory or we need to since // we don't know what actual index is going to be retrieved. @@ -1355,7 +1356,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } // Finally we can indirect the memory address to get the actual value - GenTreeIndir* indir = comp->gtNewIndir(simdBaseType, addr); + GenTreeIndir* indir = comp->gtNewIndir(JITtype2varType(simdBaseJitType), addr); BlockRange().InsertBefore(node, indir); LIR::Use use; @@ -2339,7 +2340,8 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) MakeSrcContained(indirNode, addr); } } - else if (addr->OperIs(GT_LCL_ADDR) && IsContainableLclAddr(addr->AsLclFld(), indirNode->Size())) + else if (addr->OperIs(GT_LCL_ADDR) && !indirNode->OperIs(GT_NULLCHECK) && + IsContainableLclAddr(addr->AsLclFld(), indirNode->Size())) { // These nodes go into an addr mode: // - GT_LCL_ADDR is a stack addr mode. diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 97800d5d10ddb8..73fbcb7af66f01 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -3063,7 +3063,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) // Next, determine if the target architecture supports BlendVariable NamedIntrinsic blendVariableId = NI_Illegal; - bool isOp1CvtMaskToVector = op1->AsHWIntrinsic()->OperIsConvertMaskToVector(); + bool isOp1CvtMaskToVector = op1->OperIsConvertMaskToVector(); if ((simdSize == 64) || isOp1CvtMaskToVector) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 7ce106b810c999..0af8e7fef65989 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9977,7 +9977,7 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) // We need both operands to be ConvertMaskToVector in // order to optimize this to a direct mask operation - if (!op1->OperIsHWIntrinsic()) + if (!op1->OperIsConvertMaskToVector()) { break; } @@ -10003,11 +10003,6 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) GenTreeHWIntrinsic* cvtOp1 = op1->AsHWIntrinsic(); GenTreeHWIntrinsic* cvtOp2 = op2->AsHWIntrinsic(); - if (!cvtOp1->OperIsConvertMaskToVector()) - { - break; - } - if (!cvtOp2->OperIsConvertMaskToVector()) { break; @@ -10448,7 +10443,6 @@ GenTree* Compiler::fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* tree) { return nullptr; } - assert(intrinOp1->GetHWIntrinsicId() == intrinsicId); if (needsMatchingBaseType && (intrinOp1->GetSimdBaseType() != simdBaseType)) { diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 0d2e03d4248790..aa4bd68dcd0dda 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -372,6 +372,26 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStackcompOpportunisticallyDependsOn(InstructionSet_SSE41_X64)) + { + break; + } + } + else if (!varTypeIsShort(simdBaseType)) + { + if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + break; + } + } + } +#endif // TARGET_XARCH + result = comp->gtNewSimdWithElementNode(retType, op1, op2, op3, simdBaseJitType, simdSize); break; }