Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29442,6 +29442,92 @@ bool GenTree::IsInvariant() const
return OperIsConst() || OperIs(GT_LCL_ADDR) || OperIs(GT_FTN_ADDR);
}

//-------------------------------------------------------------------
// IsVectorPerElementMask: returns true if this node is a vector constant per-element mask
// (every element has either all bits set or none of them).
//
// Arguments:
// simdBaseType - the base type of the constant being checked.
// simdSize - the size of the SIMD type of the intrinsic.
//
// Returns:
// True if this node is a vector constant per-element mask.
//
bool GenTree::IsVectorPerElementMask(var_types simdBaseType, unsigned simdSize) const
{
#ifdef FEATURE_SIMD
if (IsCnsVec())
{
const GenTreeVecCon* vecCon = AsVecCon();

int elementCount = vecCon->ElementCount(simdSize, simdBaseType);

switch (simdBaseType)
{
case TYP_BYTE:
case TYP_UBYTE:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u8[0], elementCount);
case TYP_SHORT:
case TYP_USHORT:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u16[0], elementCount);
case TYP_INT:
case TYP_UINT:
case TYP_FLOAT:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u32[0], elementCount);
case TYP_LONG:
case TYP_ULONG:
case TYP_DOUBLE:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u64[0], elementCount);
default:
unreached();
}
}
else if (OperIsHWIntrinsic())
{
const GenTreeHWIntrinsic* intrinsic = AsHWIntrinsic();
const NamedIntrinsic intrinsicId = intrinsic->GetHWIntrinsicId();

if (HWIntrinsicInfo::ReturnsPerElementMask(intrinsicId))
{
// We directly return a per-element mask
return true;
}

bool isScalar = false;
genTreeOps oper = intrinsic->HWOperGet(&isScalar);

switch (oper)
{
case GT_AND:
case GT_AND_NOT:
case GT_OR:
case GT_XOR:
{
// We are a binary bitwise operation where both inputs are per-element masks
return intrinsic->Op(1)->IsVectorPerElementMask(simdBaseType, simdSize) &&
intrinsic->Op(2)->IsVectorPerElementMask(simdBaseType, simdSize);
}

case GT_NOT:
{
// We are an unary bitwise operation where the input is a per-element mask
return intrinsic->Op(1)->IsVectorPerElementMask(simdBaseType, simdSize);
}

default:
{
assert(!GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper));
break;
}
}

return false;
}
#endif // FEATURE_SIMD

return false;
}

//------------------------------------------------------------------------
// IsNeverNegative: returns true if the given tree is known to be never
// negative, i. e. the upper bit will always be zero.
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2317,6 +2317,7 @@ struct GenTree
bool Precedes(GenTree* other);

bool IsInvariant() const;
bool IsVectorPerElementMask(var_types simdBaseType, unsigned simdSize) const;

bool IsNeverNegative(Compiler* comp) const;
bool IsNeverNegativeOne(Compiler* comp) const;
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2994,13 +2994,13 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node)
GenTree* op3 = node->Op(3);

// If the condition vector comes from a hardware intrinsic that
// returns a per-element mask (marked with HW_Flag_ReturnsPerElementMask),
// we can optimize the entire conditional select to
// a single BlendVariable instruction (if supported by the architecture)
// returns a per-element mask, we can optimize the entire
// conditional select to a single BlendVariable instruction
// (if supported by the architecture)

// TODO-XARCH-AVX512 Use VPBLENDM* and take input directly from K registers if cond is from MoveMaskToVectorSpecial.
// First, determine if the condition is a per-element mask
if (op1->OperIsHWIntrinsic() && HWIntrinsicInfo::ReturnsPerElementMask(op1->AsHWIntrinsic()->GetHWIntrinsicId()))
if (op1->IsVectorPerElementMask(simdBaseType, simdSize))
{
// Next, determine if the target architecture supports BlendVariable
NamedIntrinsic blendVariableId = NI_Illegal;
Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/jit/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ static bool ElementsAreSame(T* array, size_t size)
return true;
}

template <typename T>
static bool ElementsAreAllBitsSetOrZero(T* array, size_t size)
{
for (size_t i = 0; i < size; i++)
{
if (array[i] != static_cast<T>(0) && array[i] != static_cast<T>(~0))
return false;
}
return true;
}

struct simd8_t
{
union
Expand Down