Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add Vector{128/256}.LoadUnsafe(ref char) and Vector128.ShuffleUnsafe
  • Loading branch information
MihaZupan committed Mar 8, 2023
commit 71e7406f4a73267192d39c7d89a6166c6ea13702
9 changes: 2 additions & 7 deletions src/libraries/Common/src/System/HexConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,8 @@ internal static (Vector128<byte>, Vector128<byte>) AsciiToHexVector128(Vector128
Vector128<byte> lowNibbles = Vector128.UnpackLow(shiftedSrc, src);
Vector128<byte> highNibbles = Vector128.UnpackHigh(shiftedSrc, src);

return (ShuffleUnsafe(hexMap, lowNibbles & Vector128.Create((byte)0xF)),
ShuffleUnsafe(hexMap, highNibbles & Vector128.Create((byte)0xF)));

// TODO: remove once https://github.com/dotnet/runtime/pull/80963 is merged
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static Vector128<byte> ShuffleUnsafe(Vector128<byte> value, Vector128<byte> mask)
=> Ssse3.IsSupported ? Ssse3.Shuffle(value, mask) : AdvSimd.Arm64.VectorTableLookup(value, mask);
return (Vector128.ShuffleUnsafe(hexMap, lowNibbles & Vector128.Create((byte)0xF)),
Vector128.ShuffleUnsafe(hexMap, highNibbles & Vector128.Create((byte)0xF)));
}

private static void EncodeToUtf16_Vector128(ReadOnlySpan<byte> bytes, Span<char> chars, Casing casing)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ private static bool EqualsIgnoreCase_Vector128(ref char charA, ref char charB, i
Vector128<ushort> vec2;
do
{
vec1 = Vector128.LoadUnsafe(ref Unsafe.As<char, ushort>(ref charA), i);
vec2 = Vector128.LoadUnsafe(ref Unsafe.As<char, ushort>(ref charB), i);
vec1 = Vector128.LoadUnsafe(ref charA, i);
vec2 = Vector128.LoadUnsafe(ref charB, i);

if (!Utf16Utility.AllCharsInVector128AreAscii(vec1 | vec2))
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -860,10 +860,10 @@ private static Vector128<byte> IndexOfAnyLookupCore(Vector128<byte> source, Vect

// The bitmapLookup represents a 8x16 table of bits, indicating whether a character is present in the needle.
// Lookup the rows via the lower nibble and the column via the higher nibble.
Vector128<byte> bitMask = Shuffle(bitmapLookup, lowNibbles);
Vector128<byte> bitMask = Vector128.ShuffleUnsafe(bitmapLookup, lowNibbles);

// For values above 127, the high nibble will be above 7. We construct the positions vector for the shuffle such that those values map to 0.
Vector128<byte> bitPositions = Shuffle(Vector128.Create(0x8040201008040201, 0).AsByte(), highNibbles);
Vector128<byte> bitPositions = Vector128.ShuffleUnsafe(Vector128.Create(0x8040201008040201, 0).AsByte(), highNibbles);

Vector128<byte> result = bitMask & bitPositions;
return result;
Expand Down Expand Up @@ -909,10 +909,10 @@ private static Vector128<byte> IndexOfAnyLookup<TNegator>(Vector128<byte> source
Vector128<byte> lowNibbles = source & Vector128.Create((byte)0xF);
Vector128<byte> highNibbles = Vector128.ShiftRightLogical(source.AsInt32(), 4).AsByte() & Vector128.Create((byte)0xF);

Vector128<byte> row0 = Shuffle(bitmapLookup0, lowNibbles);
Vector128<byte> row1 = Shuffle(bitmapLookup1, lowNibbles);
Vector128<byte> row0 = Vector128.ShuffleUnsafe(bitmapLookup0, lowNibbles);
Vector128<byte> row1 = Vector128.ShuffleUnsafe(bitmapLookup1, lowNibbles);

Vector128<byte> bitmask = Shuffle(Vector128.Create(0x8040201008040201).AsByte(), highNibbles);
Vector128<byte> bitmask = Vector128.ShuffleUnsafe(Vector128.Create(0x8040201008040201).AsByte(), highNibbles);

Vector128<byte> mask = Vector128.GreaterThan(highNibbles.AsSByte(), Vector128.Create((sbyte)0x7)).AsByte();
Vector128<byte> bitsets = Vector128.ConditionalSelect(mask, row1, row0);
Expand Down Expand Up @@ -944,16 +944,6 @@ private static Vector256<byte> IndexOfAnyLookup<TNegator>(Vector256<byte> source
return TNegator.NegateIfNeeded(result);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<byte> Shuffle(Vector128<byte> vector, Vector128<byte> indices)
{
// We're not using Vector128.Shuffle as the caller already accounts for and relies on differences in behavior between platforms.
return
Ssse3.IsSupported ? Ssse3.Shuffle(vector, indices) :
AdvSimd.Arm64.IsSupported ? AdvSimd.Arm64.VectorTableLookup(vector, indices) :
PackedSimd.Swizzle(vector, indices);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe int ComputeFirstIndex<T, TNegator>(ref T searchSpace, ref T current, Vector128<byte> result)
where TNegator : struct, INegator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(values)),
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector256<byte> ContainsMask32CharsAvx2(Vector256<byte> charMapLower, Vector256<byte> charMapUpper, ref char searchSpace)
{
Vector256<ushort> source0 = Vector256.LoadUnsafe(ref Unsafe.As<char, ushort>(ref searchSpace));
Vector256<ushort> source1 = Vector256.LoadUnsafe(ref Unsafe.As<char, ushort>(ref searchSpace), (nuint)Vector256<ushort>.Count);
Vector256<ushort> source0 = Vector256.LoadUnsafe(ref searchSpace);
Vector256<ushort> source1 = Vector256.LoadUnsafe(ref searchSpace, (nuint)Vector256<ushort>.Count);

Vector256<byte> sourceLower = Avx2.PackUnsignedSaturate(
(source0 & Vector256.Create((ushort)255)).AsInt16(),
Expand Down Expand Up @@ -144,8 +144,8 @@ private static Vector256<byte> IsCharBitNotSetAvx2(Vector256<byte> charMapLower,
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<byte> ContainsMask16Chars(Vector128<byte> charMapLower, Vector128<byte> charMapUpper, ref char searchSpace)
{
Vector128<ushort> source0 = Vector128.LoadUnsafe(ref Unsafe.As<char, ushort>(ref searchSpace));
Vector128<ushort> source1 = Vector128.LoadUnsafe(ref Unsafe.As<char, ushort>(ref searchSpace), (nuint)Vector128<ushort>.Count);
Vector128<ushort> source0 = Vector128.LoadUnsafe(ref searchSpace);
Vector128<ushort> source1 = Vector128.LoadUnsafe(ref searchSpace, (nuint)Vector128<ushort>.Count);

Vector128<byte> sourceLower = Sse2.IsSupported
? Sse2.PackUnsignedSaturate((source0 & Vector128.Create((ushort)255)).AsInt16(), (source1 & Vector128.Create((ushort)255)).AsInt16())
Expand All @@ -168,26 +168,17 @@ private static Vector128<byte> IsCharBitNotSet(Vector128<byte> charMapLower, Vec
? Sse2.ShiftRightLogical(values.AsInt32(), VectorizedIndexShift).AsByte() & Vector128.Create((byte)15)
: AdvSimd.ShiftRightLogical(values, VectorizedIndexShift);

Vector128<byte> bitPositions = Shuffle(Vector128.Create(0x8040201008040201).AsByte(), highNibble);
Vector128<byte> bitPositions = Vector128.ShuffleUnsafe(Vector128.Create(0x8040201008040201).AsByte(), highNibble);

Vector128<byte> index = values & Vector128.Create((byte)VectorizedIndexMask);
Vector128<byte> bitMaskLower = Shuffle(charMapLower, index);
Vector128<byte> bitMaskUpper = Shuffle(charMapUpper, index - Vector128.Create((byte)16));
Vector128<byte> bitMaskLower = Vector128.ShuffleUnsafe(charMapLower, index);
Vector128<byte> bitMaskUpper = Vector128.ShuffleUnsafe(charMapUpper, index - Vector128.Create((byte)16));
Vector128<byte> mask = Vector128.GreaterThan(index, Vector128.Create((byte)15));
Vector128<byte> bitMask = Vector128.ConditionalSelect(mask, bitMaskUpper, bitMaskLower);

return Vector128.Equals(bitMask & bitPositions, Vector128<byte>.Zero);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<byte> Shuffle(Vector128<byte> vector, Vector128<byte> indices)
{
// We're not using Vector128.Shuffle as the caller already accounts for differences in behavior between platforms.
return Ssse3.IsSupported
? Ssse3.Shuffle(vector, indices)
: AdvSimd.Arm64.VectorTableLookup(vector, indices);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool ShouldUseSimpleLoop(int searchSpaceLength, int valuesLength)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1820,6 +1820,21 @@ public static Vector128<T> LoadUnsafe<T>(ref T source, nuint elementOffset)
return Unsafe.ReadUnaligned<Vector128<T>>(ref Unsafe.As<T, byte>(ref source));
}

/// <summary>Loads a vector from the given source and reinterprets it as <see cref="ushort"/>.</summary>
/// <param name="source">The source from which the vector will be loaded.</param>
/// <returns>The vector loaded from <paramref name="source" />.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector128<ushort> LoadUnsafe(ref char source) =>
LoadUnsafe(ref Unsafe.As<char, ushort>(ref source));

/// <summary>Loads a vector from the given source and element offset and reinterprets it as <see cref="ushort"/>.</summary>
/// <param name="source">The source to which <paramref name="elementOffset" /> will be added before loading the vector.</param>
/// <param name="elementOffset">The element offset from <paramref name="source" /> from which the vector will be loaded.</param>
/// <returns>The vector loaded from <paramref name="source" /> plus <paramref name="elementOffset" />.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector128<ushort> LoadUnsafe(ref char source, nuint elementOffset) =>
LoadUnsafe(ref Unsafe.As<char, ushort>(ref source), elementOffset);

/// <summary>Computes the maximum of two vectors on a per-element basis.</summary>
/// <typeparam name="T">The type of the elements in the vector.</typeparam>
/// <param name="left">The vector to compare with <paramref name="right" />.</param>
Expand Down Expand Up @@ -2419,6 +2434,27 @@ public static Vector128<sbyte> Shuffle(Vector128<sbyte> vector, Vector128<sbyte>
return result;
}

/// <summary>Creates a new vector by selecting values from an input vector using a set of indices.
/// Behavior is platform-dependent for out-of-range indices.</summary>
/// <param name="vector">The input vector from which values are selected.</param>
/// <param name="indices">The per-element indices used to select a value from <paramref name="vector" />.</param>
/// <returns>A new vector containing the values from <paramref name="vector" /> selected by the given <paramref name="indices" />.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector128<byte> ShuffleUnsafe(Vector128<byte> vector, Vector128<byte> indices)
{
if (Ssse3.IsSupported)
{
return Ssse3.Shuffle(vector, indices);
}

if (AdvSimd.Arm64.IsSupported)
{
return AdvSimd.Arm64.VectorTableLookup(vector, indices);
}

throw new PlatformNotSupportedException();
}

/// <summary>Creates a new vector by selecting values from an input vector using a set of indices.</summary>
/// <param name="vector">The input vector from which values are selected.</param>
/// <param name="indices">The per-element indices used to select a value from <paramref name="vector" />.</param>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1809,6 +1809,21 @@ public static Vector256<T> LoadUnsafe<T>(ref T source, nuint elementOffset)
return Unsafe.ReadUnaligned<Vector256<T>>(ref Unsafe.As<T, byte>(ref source));
}

/// <summary>Loads a vector from the given source and reinterprets it as <see cref="ushort"/>.</summary>
/// <param name="source">The source from which the vector will be loaded.</param>
/// <returns>The vector loaded from <paramref name="source" />.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector256<ushort> LoadUnsafe(ref char source) =>
LoadUnsafe(ref Unsafe.As<char, ushort>(ref source));

/// <summary>Loads a vector from the given source and element offset and reinterprets it as <see cref="ushort"/>.</summary>
/// <param name="source">The source to which <paramref name="elementOffset" /> will be added before loading the vector.</param>
/// <param name="elementOffset">The element offset from <paramref name="source" /> from which the vector will be loaded.</param>
/// <returns>The vector loaded from <paramref name="source" /> plus <paramref name="elementOffset" />.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static Vector256<ushort> LoadUnsafe(ref char source, nuint elementOffset) =>
LoadUnsafe(ref Unsafe.As<char, ushort>(ref source), elementOffset);

/// <summary>Computes the maximum of two vectors on a per-element basis.</summary>
/// <typeparam name="T">The type of the elements in the vector.</typeparam>
/// <param name="left">The vector to compare with <paramref name="right" />.</param>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ ref Unsafe.As<char, byte>(ref Unsafe.Add(ref searchSpace, offset + 1)),
// Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Mula
// Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285
SEARCH_TWO_CHARS:
ref ushort ushortSearchSpace = ref Unsafe.As<char, ushort>(ref searchSpace);
if (Vector256.IsHardwareAccelerated && searchSpaceMinusValueTailLength - Vector256<ushort>.Count >= 0)
{
// Find the last unique (which is not equal to ch1) character
Expand All @@ -89,8 +88,8 @@ ref Unsafe.As<char, byte>(ref Unsafe.Add(ref searchSpace, offset + 1)),
// Make sure we don't go out of bounds
Debug.Assert(offset + ch1ch2Distance + Vector256<ushort>.Count <= searchSpaceLength);

Vector256<ushort> cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref ushortSearchSpace, (nuint)(offset + ch1ch2Distance)));
Vector256<ushort> cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref ushortSearchSpace, (nuint)offset));
Vector256<ushort> cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance)));
Vector256<ushort> cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset));
Vector256<byte> cmpAnd = (cmpCh1 & cmpCh2).AsByte();

// Early out: cmpAnd is all zeros
Expand Down Expand Up @@ -156,8 +155,8 @@ ref Unsafe.As<char, byte>(ref value), (nuint)(uint)valueLength * 2))
// Make sure we don't go out of bounds
Debug.Assert(offset + ch1ch2Distance + Vector128<ushort>.Count <= searchSpaceLength);

Vector128<ushort> cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref ushortSearchSpace, (nuint)(offset + ch1ch2Distance)));
Vector128<ushort> cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref ushortSearchSpace, (nuint)offset));
Vector128<ushort> cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance)));
Vector128<ushort> cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset));
Vector128<byte> cmpAnd = (cmpCh1 & cmpCh2).AsByte();

// Early out: cmpAnd is all zeros
Expand Down Expand Up @@ -254,7 +253,6 @@ ref Unsafe.As<char, byte>(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)),
// Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Mula
// Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285
SEARCH_TWO_CHARS:
ref ushort ushortSearchSpace = ref Unsafe.As<char, ushort>(ref searchSpace);
if (Vector256.IsHardwareAccelerated && searchSpaceMinusValueTailLength >= Vector256<ushort>.Count)
{
offset = searchSpaceMinusValueTailLength - Vector256<ushort>.Count;
Expand All @@ -272,8 +270,8 @@ ref Unsafe.As<char, byte>(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)),
do
{

Vector256<ushort> cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref ushortSearchSpace, (nuint)offset));
Vector256<ushort> cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref ushortSearchSpace, (nuint)(offset + ch1ch2Distance)));
Vector256<ushort> cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset));
Vector256<ushort> cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance)));
Vector256<byte> cmpAnd = (cmpCh1 & cmpCh2).AsByte();

// Early out: cmpAnd is all zeros
Expand Down Expand Up @@ -321,8 +319,8 @@ ref Unsafe.As<char, byte>(ref value), (nuint)(uint)valueLength * 2))

do
{
Vector128<ushort> cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref ushortSearchSpace, (nuint)offset));
Vector128<ushort> cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref ushortSearchSpace, (nuint)(offset + ch1ch2Distance)));
Vector128<ushort> cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset));
Vector128<ushort> cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance)));
Vector128<byte> cmpAnd = (cmpCh1 & cmpCh2).AsByte();

// Early out: cmpAnd is all zeros
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1916,7 +1916,7 @@ private static void MakeSeparatorListVectorized(ReadOnlySpan<char> sourceSpan, r
nuint offset = 0;
nuint lengthToExamine = (uint)sourceSpan.Length;

ref ushort source = ref Unsafe.As<char, ushort>(ref MemoryMarshal.GetReference(sourceSpan));
ref char source = ref MemoryMarshal.GetReference(sourceSpan);

Vector128<ushort> v1 = Vector128.Create((ushort)c);
Vector128<ushort> v2 = Vector128.Create((ushort)c2);
Expand Down Expand Up @@ -1947,7 +1947,7 @@ private static void MakeSeparatorListVectorized(ReadOnlySpan<char> sourceSpan, r

while (offset < lengthToExamine)
{
char curr = (char)Unsafe.Add(ref source, offset);
char curr = Unsafe.Add(ref source, offset);
if (curr == c || curr == c2 || curr == c3)
{
sepListBuilder.Append((int)offset);
Expand Down