diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ce92e74c0175c2..5032b348b793b0 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -29442,6 +29442,92 @@ bool GenTree::IsInvariant() const return OperIsConst() || OperIs(GT_LCL_ADDR) || OperIs(GT_FTN_ADDR); } +//------------------------------------------------------------------- +// IsVectorPerElementMask: returns true if this node is a vector constant per-element mask +// (every element has either all bits set or none of them). +// +// Arguments: +// simdBaseType - the base type of the constant being checked. +// simdSize - the size of the SIMD type of the intrinsic. +// +// Returns: +// True if this node is a vector constant per-element mask. +// +bool GenTree::IsVectorPerElementMask(var_types simdBaseType, unsigned simdSize) const +{ +#ifdef FEATURE_SIMD + if (IsCnsVec()) + { + const GenTreeVecCon* vecCon = AsVecCon(); + + int elementCount = vecCon->ElementCount(simdSize, simdBaseType); + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u8[0], elementCount); + case TYP_SHORT: + case TYP_USHORT: + return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u16[0], elementCount); + case TYP_INT: + case TYP_UINT: + case TYP_FLOAT: + return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u32[0], elementCount); + case TYP_LONG: + case TYP_ULONG: + case TYP_DOUBLE: + return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u64[0], elementCount); + default: + unreached(); + } + } + else if (OperIsHWIntrinsic()) + { + const GenTreeHWIntrinsic* intrinsic = AsHWIntrinsic(); + const NamedIntrinsic intrinsicId = intrinsic->GetHWIntrinsicId(); + + if (HWIntrinsicInfo::ReturnsPerElementMask(intrinsicId)) + { + // We directly return a per-element mask + return true; + } + + bool isScalar = false; + genTreeOps oper = intrinsic->HWOperGet(&isScalar); + + switch (oper) + { + case GT_AND: + case GT_AND_NOT: + case GT_OR: + case GT_XOR: + { + // We are a binary bitwise operation where both inputs are per-element masks + return intrinsic->Op(1)->IsVectorPerElementMask(simdBaseType, simdSize) && + intrinsic->Op(2)->IsVectorPerElementMask(simdBaseType, simdSize); + } + + case GT_NOT: + { + // We are an unary bitwise operation where the input is a per-element mask + return intrinsic->Op(1)->IsVectorPerElementMask(simdBaseType, simdSize); + } + + default: + { + assert(!GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper)); + break; + } + } + + return false; + } +#endif // FEATURE_SIMD + + return false; +} + //------------------------------------------------------------------------ // IsNeverNegative: returns true if the given tree is known to be never // negative, i. e. the upper bit will always be zero. diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index df0b8d73d5ea23..4dd767887f35eb 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -2317,6 +2317,7 @@ struct GenTree bool Precedes(GenTree* other); bool IsInvariant() const; + bool IsVectorPerElementMask(var_types simdBaseType, unsigned simdSize) const; bool IsNeverNegative(Compiler* comp) const; bool IsNeverNegativeOne(Compiler* comp) const; diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 514f3ea3e69e4d..0f459168e01fab 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -2994,13 +2994,13 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) GenTree* op3 = node->Op(3); // If the condition vector comes from a hardware intrinsic that - // returns a per-element mask (marked with HW_Flag_ReturnsPerElementMask), - // we can optimize the entire conditional select to - // a single BlendVariable instruction (if supported by the architecture) + // returns a per-element mask, we can optimize the entire + // conditional select to a single BlendVariable instruction + // (if supported by the architecture) // TODO-XARCH-AVX512 Use VPBLENDM* and take input directly from K registers if cond is from MoveMaskToVectorSpecial. // First, determine if the condition is a per-element mask - if (op1->OperIsHWIntrinsic() && HWIntrinsicInfo::ReturnsPerElementMask(op1->AsHWIntrinsic()->GetHWIntrinsicId())) + if (op1->IsVectorPerElementMask(simdBaseType, simdSize)) { // Next, determine if the target architecture supports BlendVariable NamedIntrinsic blendVariableId = NI_Illegal; diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 72d710b4aea69e..dfc12ef64ef524 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -15,6 +15,17 @@ static bool ElementsAreSame(T* array, size_t size) return true; } +template +static bool ElementsAreAllBitsSetOrZero(T* array, size_t size) +{ + for (size_t i = 0; i < size; i++) + { + if (array[i] != static_cast(0) && array[i] != static_cast(~0)) + return false; + } + return true; +} + struct simd8_t { union