diff --git a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems index 5db0c043f052..8c97e142e32f 100644 --- a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems +++ b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems @@ -47,6 +47,7 @@ + diff --git a/src/System.Private.CoreLib/shared/System/BitOps.cs b/src/System.Private.CoreLib/shared/System/BitOps.cs new file mode 100644 index 000000000000..39caa592afda --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/BitOps.cs @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; + +using Internal.Runtime.CompilerServices; + +namespace System +{ + internal static class BitOps + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int TrailingZeroCount(int matches) + { + if (Bmi1.IsSupported) + { + return (int)Bmi1.TrailingZeroCount((uint)matches); + } + else // Software fallback + { + // https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup + // uint.MaxValue >> 27 is always in range [0 - 31] so we use Unsafe.AddByteOffset to avoid bounds check + return Unsafe.AddByteOffset( + ref MemoryMarshal.GetReference(TrailingCountMultiplyDeBruijn), + ((uint)((matches & -matches) * 0x077CB531U)) >> 27); + } + } + + private static ReadOnlySpan TrailingCountMultiplyDeBruijn => new byte[32] + { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + } +} diff --git a/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs b/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs index 4f076eedf7fe..63a564f0de7d 100644 --- a/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs +++ b/src/System.Private.CoreLib/shared/System/SpanHelpers.Byte.cs @@ -5,6 +5,7 @@ using System.Diagnostics; using System.Runtime.CompilerServices; using System.Numerics; +using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using Internal.Runtime.CompilerServices; @@ -32,26 +33,26 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte int valueTailLength = valueLength - 1; int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; - int index = 0; + int offset = 0; while (remainingSearchSpaceLength > 0) { // Do a quick search for the first element of "value". - int relativeIndex = IndexOf(ref Unsafe.Add(ref searchSpace, index), valueHead, remainingSearchSpaceLength); + int relativeIndex = IndexOf(ref Unsafe.Add(ref searchSpace, offset), valueHead, remainingSearchSpaceLength); if (relativeIndex == -1) break; remainingSearchSpaceLength -= relativeIndex; - index += relativeIndex; + offset += relativeIndex; if (remainingSearchSpaceLength <= 0) break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. // Found the first element of "value". See if the tail matches. - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, index + 1), ref valueTail, valueTailLength)) - return index; // The tail matched. Return a successful find. + if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + 1), ref valueTail, valueTailLength)) + return offset; // The tail matched. Return a successful find. remainingSearchSpaceLength--; - index++; + offset++; } return -1; } @@ -64,21 +65,21 @@ public static int IndexOfAny(ref byte searchSpace, int searchSpaceLength, ref by if (valueLength == 0) return 0; // A zero-length sequence is always treated as "found" at the start of the search space. - int index = -1; + int offset = -1; for (int i = 0; i < valueLength; i++) { var tempIndex = IndexOf(ref searchSpace, Unsafe.Add(ref value, i), searchSpaceLength); - if ((uint)tempIndex < (uint)index) + if ((uint)tempIndex < (uint)offset) { - index = tempIndex; + offset = tempIndex; // Reduce space for search, cause we don't care if we find the search value after the index of a previously found value searchSpaceLength = tempIndex; - if (index == 0) + if (offset == 0) break; } } - return index; + return offset; } public static int LastIndexOfAny(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength) @@ -89,14 +90,14 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpaceLength, re if (valueLength == 0) return 0; // A zero-length sequence is always treated as "found" at the start of the search space. - int index = -1; + int offset = -1; for (int i = 0; i < valueLength; i++) { var tempIndex = LastIndexOf(ref searchSpace, Unsafe.Add(ref value, i), searchSpaceLength); - if (tempIndex > index) - index = tempIndex; + if (tempIndex > offset) + offset = tempIndex; } - return index; + return offset; } // Adapted from IndexOf(...) @@ -105,13 +106,12 @@ public static unsafe bool Contains(ref byte searchSpace, byte value, int length) Debug.Assert(length >= 0); uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions - IntPtr index = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations + IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations IntPtr nLength = (IntPtr)length; if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) { - int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector.Count - 1); - nLength = (IntPtr)((Vector.Count - unaligned) & (Vector.Count - 1)); + nLength = UnalignedByteCountVector(ref searchSpace); } SequentialScan: @@ -119,68 +119,67 @@ public static unsafe bool Contains(ref byte searchSpace, byte value, int length) { nLength -= 8; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 0) || - uValue == Unsafe.AddByteOffset(ref searchSpace, index + 1) || - uValue == Unsafe.AddByteOffset(ref searchSpace, index + 2) || - uValue == Unsafe.AddByteOffset(ref searchSpace, index + 3) || - uValue == Unsafe.AddByteOffset(ref searchSpace, index + 4) || - uValue == Unsafe.AddByteOffset(ref searchSpace, index + 5) || - uValue == Unsafe.AddByteOffset(ref searchSpace, index + 6) || - uValue == Unsafe.AddByteOffset(ref searchSpace, index + 7)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 0) || + uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1) || + uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2) || + uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3) || + uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4) || + uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5) || + uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6) || + uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7)) { goto Found; } - index += 8; + offset += 8; } if ((byte*)nLength >= (byte*)4) { nLength -= 4; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 0) || - uValue == Unsafe.AddByteOffset(ref searchSpace, index + 1) || - uValue == Unsafe.AddByteOffset(ref searchSpace, index + 2) || - uValue == Unsafe.AddByteOffset(ref searchSpace, index + 3)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 0) || + uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1) || + uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2) || + uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) { goto Found; } - index += 4; + offset += 4; } while ((byte*)nLength > (byte*)0) { nLength -= 1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) goto Found; - index += 1; + offset += 1; } - if (Vector.IsHardwareAccelerated && ((int)(byte*)index < length)) + if (Vector.IsHardwareAccelerated && ((int)(byte*)offset < length)) { - nLength = (IntPtr)((length - (int)(byte*)index) & ~(Vector.Count - 1)); + nLength = (IntPtr)((length - (int)(byte*)offset) & ~(Vector.Count - 1)); - // Get comparison Vector - Vector vComparison = new Vector(value); + Vector values = new Vector(value); - while ((byte*)nLength > (byte*)index) + while ((byte*)nLength > (byte*)offset) { - var vMatches = Vector.Equals(vComparison, Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref searchSpace, index))); - if (Vector.Zero.Equals(vMatches)) + var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset)); + if (Vector.Zero.Equals(matches)) { - index += Vector.Count; + offset += Vector.Count; continue; } goto Found; } - if ((int)(byte*)index < length) + if ((int)(byte*)offset < length) { - nLength = (IntPtr)(length - (int)(byte*)index); + nLength = (IntPtr)(length - (int)(byte*)offset); goto SequentialScan; } } @@ -196,107 +195,198 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) Debug.Assert(length >= 0); uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions - IntPtr index = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations + IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations IntPtr nLength = (IntPtr)length; - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) + if (Avx2.IsSupported || Sse2.IsSupported) { - int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector.Count - 1); - nLength = (IntPtr)((Vector.Count - unaligned) & (Vector.Count - 1)); + // Avx2 branch also operates on Sse2 sizes, so check is combined. + if (length >= Vector128.Count * 2) + { + nLength = UnalignedByteCountVector128(ref searchSpace); + } + } + else if (Vector.IsHardwareAccelerated) + { + if (length >= Vector.Count * 2) + { + nLength = UnalignedByteCountVector(ref searchSpace); + } } SequentialScan: while ((byte*)nLength >= (byte*)8) { nLength -= 8; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) goto Found; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 1)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 2)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 3)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) goto Found3; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 4)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4)) goto Found4; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 5)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5)) goto Found5; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 6)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6)) goto Found6; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 7)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7)) goto Found7; - index += 8; + offset += 8; } if ((byte*)nLength >= (byte*)4) { nLength -= 4; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) goto Found; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 1)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 2)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 3)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) goto Found3; - index += 4; + offset += 4; } while ((byte*)nLength > (byte*)0) { nLength -= 1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) goto Found; - index += 1; + offset += 1; } - if (Vector.IsHardwareAccelerated && ((int)(byte*)index < length)) + if (Avx2.IsSupported) { - nLength = (IntPtr)((length - (int)(byte*)index) & ~(Vector.Count - 1)); + if ((int)(byte*)offset < length) + { + nLength = GetByteVector256SpanLength(offset, length); + if ((byte*)nLength > (byte*)offset) + { + Vector256 values = Vector256.Create(value); + do + { + Vector256 search = LoadVector256(ref searchSpace, offset); + int matches = Avx2.MoveMask(Avx2.CompareEqual(values, search)); + if (matches == 0) + { + offset += Vector256.Count; + continue; + } + + // Find offset of first match + return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); + } while ((byte*)nLength > (byte*)offset); + } - // Get comparison Vector - Vector vComparison = new Vector(value); + nLength = GetByteVector128SpanLength(offset, length); + if ((byte*)nLength > (byte*)offset) + { + Vector128 values = Vector128.Create(value); + Vector128 search = LoadVector128(ref searchSpace, offset); + + int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search)); + if (matches == 0) + { + offset += Vector128.Count; + } + else + { + // Find offset of first match + return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); + } + } - while ((byte*)nLength > (byte*)index) - { - var vMatches = Vector.Equals(vComparison, Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref searchSpace, index))); - if (Vector.Zero.Equals(vMatches)) + if ((int)(byte*)offset < length) { - index += Vector.Count; - continue; + nLength = (IntPtr)(length - (int)(byte*)offset); + goto SequentialScan; } - // Find offset of first match - return (int)(byte*)index + LocateFirstFoundByte(vMatches); } + } + else if (Sse2.IsSupported) + { + if ((int)(byte*)offset < length) + { + nLength = GetByteVector128SpanLength(offset, length); - if ((int)(byte*)index < length) + Vector128 values = Vector128.Create(value); + while ((byte*)nLength > (byte*)offset) + { + Vector128 search = LoadVector128(ref searchSpace, offset); + + int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search)); + if (matches == 0) + { + offset += Vector128.Count; + continue; + } + + // Find offset of first match + return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); + } + + if ((int)(byte*)offset < length) + { + nLength = (IntPtr)(length - (int)(byte*)offset); + goto SequentialScan; + } + } + } + else if (Vector.IsHardwareAccelerated) + { + if ((int)(byte*)offset < length) { - nLength = (IntPtr)(length - (int)(byte*)index); - goto SequentialScan; + nLength = GetByteVectorSpanLength(offset, length); + + Vector values = new Vector(value); + + while ((byte*)nLength > (byte*)offset) + { + var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset)); + if (Vector.Zero.Equals(matches)) + { + offset += Vector.Count; + continue; + } + + // Find offset of first match + return (int)(byte*)offset + LocateFirstFoundByte(matches); + } + + if ((int)(byte*)offset < length) + { + nLength = (IntPtr)(length - (int)(byte*)offset); + goto SequentialScan; + } } } return -1; Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549 - return (int)(byte*)index; + return (int)(byte*)offset; Found1: - return (int)(byte*)(index + 1); + return (int)(byte*)(offset + 1); Found2: - return (int)(byte*)(index + 2); + return (int)(byte*)(offset + 2); Found3: - return (int)(byte*)(index + 3); + return (int)(byte*)(offset + 3); Found4: - return (int)(byte*)(index + 4); + return (int)(byte*)(offset + 4); Found5: - return (int)(byte*)(index + 5); + return (int)(byte*)(offset + 5); Found6: - return (int)(byte*)(index + 6); + return (int)(byte*)(offset + 6); Found7: - return (int)(byte*)(index + 7); + return (int)(byte*)(offset + 7); + } public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength) @@ -311,11 +401,11 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b ref byte valueTail = ref Unsafe.Add(ref value, 1); int valueTailLength = valueLength - 1; - int index = 0; + int offset = 0; for (; ; ) { - Debug.Assert(0 <= index && index <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". - int remainingSearchSpaceLength = searchSpaceLength - index - valueTailLength; + Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". + int remainingSearchSpaceLength = searchSpaceLength - offset - valueTailLength; if (remainingSearchSpaceLength <= 0) break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. @@ -328,7 +418,7 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b if (SequenceEqual(ref Unsafe.Add(ref searchSpace, relativeIndex + 1), ref valueTail, valueTailLength)) return relativeIndex; // The tail matched. Return a successful find. - index += remainingSearchSpaceLength - relativeIndex; + offset += remainingSearchSpaceLength - relativeIndex; } return -1; } @@ -338,104 +428,103 @@ public static unsafe int LastIndexOf(ref byte searchSpace, byte value, int lengt Debug.Assert(length >= 0); uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions - IntPtr index = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations + IntPtr offset = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations IntPtr nLength = (IntPtr)length; if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) { - int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector.Count - 1); - nLength = (IntPtr)(((length & (Vector.Count - 1)) + unaligned) & (Vector.Count - 1)); + nLength = UnalignedByteCountVectorFromEnd(ref searchSpace, length); } SequentialScan: while ((byte*)nLength >= (byte*)8) { nLength -= 8; - index -= 8; + offset -= 8; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 7)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7)) goto Found7; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 6)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6)) goto Found6; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 5)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5)) goto Found5; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 4)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4)) goto Found4; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 3)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) goto Found3; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 2)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 1)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) goto Found; } if ((byte*)nLength >= (byte*)4) { nLength -= 4; - index -= 4; + offset -= 4; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 3)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) goto Found3; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 2)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index + 1)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) goto Found; } while ((byte*)nLength > (byte*)0) { nLength -= 1; - index -= 1; + offset -= 1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, index)) + if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) goto Found; } - if (Vector.IsHardwareAccelerated && ((byte*)index > (byte*)0)) + if (Vector.IsHardwareAccelerated && ((byte*)offset > (byte*)0)) { - nLength = (IntPtr)((int)(byte*)index & ~(Vector.Count - 1)); + nLength = (IntPtr)((int)(byte*)offset & ~(Vector.Count - 1)); - // Get comparison Vector - Vector vComparison = new Vector(value); + Vector values = new Vector(value); while ((byte*)nLength > (byte*)(Vector.Count - 1)) { - var vMatches = Vector.Equals(vComparison, Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref searchSpace, index - Vector.Count))); - if (Vector.Zero.Equals(vMatches)) + var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset - Vector.Count)); + if (Vector.Zero.Equals(matches)) { - index -= Vector.Count; + offset -= Vector.Count; nLength -= Vector.Count; continue; } + // Find offset of first match - return (int)(index) - Vector.Count + LocateLastFoundByte(vMatches); + return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); } - if ((byte*)index > (byte*)0) + if ((byte*)offset > (byte*)0) { - nLength = index; + nLength = offset; goto SequentialScan; } } return -1; Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549 - return (int)(byte*)index; + return (int)(byte*)offset; Found1: - return (int)(byte*)(index + 1); + return (int)(byte*)(offset + 1); Found2: - return (int)(byte*)(index + 2); + return (int)(byte*)(offset + 2); Found3: - return (int)(byte*)(index + 3); + return (int)(byte*)(offset + 3); Found4: - return (int)(byte*)(index + 4); + return (int)(byte*)(offset + 4); Found5: - return (int)(byte*)(index + 5); + return (int)(byte*)(offset + 5); Found6: - return (int)(byte*)(index + 6); + return (int)(byte*)(offset + 6); Found7: - return (int)(byte*)(index + 7); + return (int)(byte*)(offset + 7); } public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int length) @@ -444,13 +533,23 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions - IntPtr index = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations + IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations IntPtr nLength = (IntPtr)length; - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) + if (Avx2.IsSupported || Sse2.IsSupported) { - int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector.Count - 1); - nLength = (IntPtr)((Vector.Count - unaligned) & (Vector.Count - 1)); + // Avx2 branch also operates on Sse2 sizes, so check is combined. + if (length >= Vector128.Count * 2) + { + nLength = UnalignedByteCountVector128(ref searchSpace); + } + } + else if (Vector.IsHardwareAccelerated) + { + if (length >= Vector.Count * 2) + { + nLength = UnalignedByteCountVector(ref searchSpace); + } } SequentialScan: uint lookUp; @@ -458,111 +557,197 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu { nLength -= 8; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); if (uValue0 == lookUp || uValue1 == lookUp) goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 1); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); if (uValue0 == lookUp || uValue1 == lookUp) goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 2); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); if (uValue0 == lookUp || uValue1 == lookUp) goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 3); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); if (uValue0 == lookUp || uValue1 == lookUp) goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 4); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); if (uValue0 == lookUp || uValue1 == lookUp) goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 5); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); if (uValue0 == lookUp || uValue1 == lookUp) goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 6); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); if (uValue0 == lookUp || uValue1 == lookUp) goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 7); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); if (uValue0 == lookUp || uValue1 == lookUp) goto Found7; - index += 8; + offset += 8; } if ((byte*)nLength >= (byte*)4) { nLength -= 4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); if (uValue0 == lookUp || uValue1 == lookUp) goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 1); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); if (uValue0 == lookUp || uValue1 == lookUp) goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 2); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); if (uValue0 == lookUp || uValue1 == lookUp) goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 3); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); if (uValue0 == lookUp || uValue1 == lookUp) goto Found3; - index += 4; + offset += 4; } while ((byte*)nLength > (byte*)0) { nLength -= 1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); if (uValue0 == lookUp || uValue1 == lookUp) goto Found; - index += 1; + offset += 1; } - if (Vector.IsHardwareAccelerated && ((int)(byte*)index < length)) + if (Avx2.IsSupported) { - nLength = (IntPtr)((length - (int)(byte*)index) & ~(Vector.Count - 1)); + if ((int)(byte*)offset < length) + { + nLength = GetByteVector256SpanLength(offset, length); + if ((byte*)nLength > (byte*)offset) + { + Vector256 values0 = Vector256.Create(value0); + Vector256 values1 = Vector256.Create(value1); + do + { + Vector256 search = LoadVector256(ref searchSpace, offset); + int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search)); + matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search)); + if (matches == 0) + { + offset += Vector256.Count; + continue; + } + + // Find offset of first match + return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); + } while ((byte*)nLength > (byte*)offset); + } - // Get comparison Vector - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); + nLength = GetByteVector128SpanLength(offset, length); + if ((byte*)nLength > (byte*)offset) + { + Vector128 values0 = Vector128.Create(value0); + Vector128 values1 = Vector128.Create(value1); + + Vector128 search = LoadVector128(ref searchSpace, offset); + int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search)); + matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search)); + if (matches == 0) + { + offset += Vector128.Count; + } + else + { + // Find offset of first match + return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); + } + } - while ((byte*)nLength > (byte*)index) - { - Vector vData = Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref searchSpace, index)); - var vMatches = Vector.BitwiseOr( - Vector.Equals(vData, values0), - Vector.Equals(vData, values1)); - if (Vector.Zero.Equals(vMatches)) + if ((int)(byte*)offset < length) { - index += Vector.Count; - continue; + nLength = (IntPtr)(length - (int)(byte*)offset); + goto SequentialScan; } - // Find offset of first match - return (int)(byte*)index + LocateFirstFoundByte(vMatches); } + } + else if (Sse2.IsSupported) + { + if ((int)(byte*)offset < length) + { + nLength = GetByteVector128SpanLength(offset, length); + + Vector128 values0 = Vector128.Create(value0); + Vector128 values1 = Vector128.Create(value1); - if ((int)(byte*)index < length) + while ((byte*)nLength > (byte*)offset) + { + Vector128 search = LoadVector128(ref searchSpace, offset); + int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search)); + matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search)); + if (matches == 0) + { + offset += Vector128.Count; + continue; + } + + // Find offset of first match + return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); + } + + if ((int)(byte*)offset < length) + { + nLength = (IntPtr)(length - (int)(byte*)offset); + goto SequentialScan; + } + } + } + else if (Vector.IsHardwareAccelerated) + { + if ((int)(byte*)offset < length) { - nLength = (IntPtr)(length - (int)(byte*)index); - goto SequentialScan; + nLength = GetByteVectorSpanLength(offset, length); + + Vector values0 = new Vector(value0); + Vector values1 = new Vector(value1); + + while ((byte*)nLength > (byte*)offset) + { + Vector search = LoadVector(ref searchSpace, offset); + var matches = Vector.BitwiseOr( + Vector.Equals(search, values0), + Vector.Equals(search, values1)); + if (Vector.Zero.Equals(matches)) + { + offset += Vector.Count; + continue; + } + + // Find offset of first match + return (int)(byte*)offset + LocateFirstFoundByte(matches); + } + + if ((int)(byte*)offset < length) + { + nLength = (IntPtr)(length - (int)(byte*)offset); + goto SequentialScan; + } } } return -1; Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549 - return (int)(byte*)index; + return (int)(byte*)offset; Found1: - return (int)(byte*)(index + 1); + return (int)(byte*)(offset + 1); Found2: - return (int)(byte*)(index + 2); + return (int)(byte*)(offset + 2); Found3: - return (int)(byte*)(index + 3); + return (int)(byte*)(offset + 3); Found4: - return (int)(byte*)(index + 4); + return (int)(byte*)(offset + 4); Found5: - return (int)(byte*)(index + 5); + return (int)(byte*)(offset + 5); Found6: - return (int)(byte*)(index + 6); + return (int)(byte*)(offset + 6); Found7: - return (int)(byte*)(index + 7); + return (int)(byte*)(offset + 7); } public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byte value2, int length) @@ -572,13 +757,23 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions uint uValue2 = value2; // Use uint for comparisons to avoid unnecessary 8->32 extensions - IntPtr index = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations + IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations IntPtr nLength = (IntPtr)length; - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) + if (Avx2.IsSupported || Sse2.IsSupported) + { + // Avx2 branch also operates on Sse2 sizes, so check is combined. + if (length >= Vector128.Count * 2) + { + nLength = UnalignedByteCountVector128(ref searchSpace); + } + } + else if (Vector.IsHardwareAccelerated) { - int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector.Count - 1); - nLength = (IntPtr)((Vector.Count - unaligned) & (Vector.Count - 1)); + if (length >= Vector.Count * 2) + { + nLength = UnalignedByteCountVector(ref searchSpace); + } } SequentialScan: uint lookUp; @@ -586,116 +781,208 @@ public static unsafe int IndexOfAny(ref byte searchSpace, byte value0, byte valu { nLength -= 8; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 1); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 2); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 3); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 4); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 5); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 6); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 7); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found7; - index += 8; + offset += 8; } if ((byte*)nLength >= (byte*)4) { nLength -= 4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 1); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 2); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 3); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found3; - index += 4; + offset += 4; } while ((byte*)nLength > (byte*)0) { nLength -= 1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found; - index += 1; + offset += 1; } - if (Vector.IsHardwareAccelerated && ((int)(byte*)index < length)) + if (Avx2.IsSupported) { - nLength = (IntPtr)((length - (int)(byte*)index) & ~(Vector.Count - 1)); + if ((int)(byte*)offset < length) + { + nLength = GetByteVector256SpanLength(offset, length); + if ((byte*)nLength > (byte*)offset) + { + Vector256 values0 = Vector256.Create(value0); + Vector256 values1 = Vector256.Create(value1); + Vector256 values2 = Vector256.Create(value2); + do + { + Vector256 search = LoadVector256(ref searchSpace, offset); + int matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search)); + matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search)); + matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search)); + if (matches == 0) + { + offset += Vector256.Count; + continue; + } + + // Find offset of first match + return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); + } while ((byte*)nLength > (byte*)offset); + } - // Get comparison Vector - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - Vector values2 = new Vector(value2); + nLength = GetByteVector128SpanLength(offset, length); + if ((byte*)nLength > (byte*)offset) + { + Vector128 values0 = Vector128.Create(value0); + Vector128 values1 = Vector128.Create(value1); + Vector128 values2 = Vector128.Create(value2); + + Vector128 search = LoadVector128(ref searchSpace, offset); + int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search)); + matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search)); + matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search)); + if (matches == 0) + { + offset += Vector128.Count; + } + else + { + // Find offset of first match + return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); + } + } - while ((byte*)nLength > (byte*)index) + if ((int)(byte*)offset < length) + { + nLength = (IntPtr)(length - (int)(byte*)offset); + goto SequentialScan; + } + } + } + else if (Sse2.IsSupported) + { + if ((int)(byte*)offset < length) { - Vector vData = Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref searchSpace, index)); + nLength = GetByteVector128SpanLength(offset, length); - var vMatches = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(vData, values0), - Vector.Equals(vData, values1)), - Vector.Equals(vData, values2)); + Vector128 values0 = Vector128.Create(value0); + Vector128 values1 = Vector128.Create(value1); + Vector128 values2 = Vector128.Create(value2); - if (Vector.Zero.Equals(vMatches)) + while ((byte*)nLength > (byte*)offset) { - index += Vector.Count; - continue; + Vector128 search = LoadVector128(ref searchSpace, offset); + int matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search)); + matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search)); + matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search)); + if (matches == 0) + { + offset += Vector128.Count; + continue; + } + + // Find offset of first match + return ((int)(byte*)offset) + BitOps.TrailingZeroCount(matches); } - // Find offset of first match - return (int)(byte*)index + LocateFirstFoundByte(vMatches); - } - if ((int)(byte*)index < length) + if ((int)(byte*)offset < length) + { + nLength = (IntPtr)(length - (int)(byte*)offset); + goto SequentialScan; + } + } + } + else if (Vector.IsHardwareAccelerated) + { + if ((int)(byte*)offset < length) { - nLength = (IntPtr)(length - (int)(byte*)index); - goto SequentialScan; + nLength = GetByteVectorSpanLength(offset, length); + + Vector values0 = new Vector(value0); + Vector values1 = new Vector(value1); + Vector values2 = new Vector(value2); + + while ((byte*)nLength > (byte*)offset) + { + Vector search = LoadVector(ref searchSpace, offset); + + var matches = Vector.BitwiseOr( + Vector.BitwiseOr( + Vector.Equals(search, values0), + Vector.Equals(search, values1)), + Vector.Equals(search, values2)); + + if (Vector.Zero.Equals(matches)) + { + offset += Vector.Count; + continue; + } + + // Find offset of first match + return (int)(byte*)offset + LocateFirstFoundByte(matches); + } + + if ((int)(byte*)offset < length) + { + nLength = (IntPtr)(length - (int)(byte*)offset); + goto SequentialScan; + } } } return -1; Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549 - return (int)(byte*)index; + return (int)(byte*)offset; Found1: - return (int)(byte*)(index + 1); + return (int)(byte*)(offset + 1); Found2: - return (int)(byte*)(index + 2); + return (int)(byte*)(offset + 2); Found3: - return (int)(byte*)(index + 3); + return (int)(byte*)(offset + 3); Found4: - return (int)(byte*)(index + 4); + return (int)(byte*)(offset + 4); Found5: - return (int)(byte*)(index + 5); + return (int)(byte*)(offset + 5); Found6: - return (int)(byte*)(index + 6); + return (int)(byte*)(offset + 6); Found7: - return (int)(byte*)(index + 7); + return (int)(byte*)(offset + 7); } public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte value1, int length) @@ -704,43 +991,42 @@ public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions - IntPtr index = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations + IntPtr offset = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations IntPtr nLength = (IntPtr)length; if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) { - int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector.Count - 1); - nLength = (IntPtr)(((length & (Vector.Count - 1)) + unaligned) & (Vector.Count - 1)); + nLength = UnalignedByteCountVectorFromEnd(ref searchSpace, length); } SequentialScan: uint lookUp; while ((byte*)nLength >= (byte*)8) { nLength -= 8; - index -= 8; + offset -= 8; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 7); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); if (uValue0 == lookUp || uValue1 == lookUp) goto Found7; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 6); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); if (uValue0 == lookUp || uValue1 == lookUp) goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 5); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); if (uValue0 == lookUp || uValue1 == lookUp) goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 4); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); if (uValue0 == lookUp || uValue1 == lookUp) goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 3); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); if (uValue0 == lookUp || uValue1 == lookUp) goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 2); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); if (uValue0 == lookUp || uValue1 == lookUp) goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 1); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); if (uValue0 == lookUp || uValue1 == lookUp) goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); if (uValue0 == lookUp || uValue1 == lookUp) goto Found; } @@ -748,18 +1034,18 @@ public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte if ((byte*)nLength >= (byte*)4) { nLength -= 4; - index -= 4; + offset -= 4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 3); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); if (uValue0 == lookUp || uValue1 == lookUp) goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 2); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); if (uValue0 == lookUp || uValue1 == lookUp) goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 1); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); if (uValue0 == lookUp || uValue1 == lookUp) goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); if (uValue0 == lookUp || uValue1 == lookUp) goto Found; } @@ -767,60 +1053,60 @@ public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte while ((byte*)nLength > (byte*)0) { nLength -= 1; - index -= 1; + offset -= 1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); if (uValue0 == lookUp || uValue1 == lookUp) goto Found; } - if (Vector.IsHardwareAccelerated && ((byte*)index > (byte*)0)) + if (Vector.IsHardwareAccelerated && ((byte*)offset > (byte*)0)) { - nLength = (IntPtr)((int)(byte*)index & ~(Vector.Count - 1)); + nLength = (IntPtr)((int)(byte*)offset & ~(Vector.Count - 1)); - // Get comparison Vector Vector values0 = new Vector(value0); Vector values1 = new Vector(value1); while ((byte*)nLength > (byte*)(Vector.Count - 1)) { - Vector vData = Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref searchSpace, index - Vector.Count)); - var vMatches = Vector.BitwiseOr( - Vector.Equals(vData, values0), - Vector.Equals(vData, values1)); - if (Vector.Zero.Equals(vMatches)) + Vector search = LoadVector(ref searchSpace, offset - Vector.Count); + var matches = Vector.BitwiseOr( + Vector.Equals(search, values0), + Vector.Equals(search, values1)); + if (Vector.Zero.Equals(matches)) { - index -= Vector.Count; + offset -= Vector.Count; nLength -= Vector.Count; continue; } + // Find offset of first match - return (int)(index) - Vector.Count + LocateLastFoundByte(vMatches); + return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); } - if ((byte*)index > (byte*)0) + if ((byte*)offset > (byte*)0) { - nLength = index; + nLength = offset; goto SequentialScan; } } return -1; Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549 - return (int)(byte*)index; + return (int)(byte*)offset; Found1: - return (int)(byte*)(index + 1); + return (int)(byte*)(offset + 1); Found2: - return (int)(byte*)(index + 2); + return (int)(byte*)(offset + 2); Found3: - return (int)(byte*)(index + 3); + return (int)(byte*)(offset + 3); Found4: - return (int)(byte*)(index + 4); + return (int)(byte*)(offset + 4); Found5: - return (int)(byte*)(index + 5); + return (int)(byte*)(offset + 5); Found6: - return (int)(byte*)(index + 6); + return (int)(byte*)(offset + 6); Found7: - return (int)(byte*)(index + 7); + return (int)(byte*)(offset + 7); } public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte value1, byte value2, int length) @@ -830,43 +1116,42 @@ public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions uint uValue2 = value2; // Use uint for comparisons to avoid unnecessary 8->32 extensions - IntPtr index = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations + IntPtr offset = (IntPtr)length; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations IntPtr nLength = (IntPtr)length; if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) { - int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector.Count - 1); - nLength = (IntPtr)(((length & (Vector.Count - 1)) + unaligned) & (Vector.Count - 1)); + nLength = UnalignedByteCountVectorFromEnd(ref searchSpace, length); } SequentialScan: uint lookUp; while ((byte*)nLength >= (byte*)8) { nLength -= 8; - index -= 8; + offset -= 8; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 7); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found7; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 6); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 5); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 4); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 3); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 2); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 1); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found; } @@ -874,18 +1159,18 @@ public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte if ((byte*)nLength >= (byte*)4) { nLength -= 4; - index -= 4; + offset -= 4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 3); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 2); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index + 1); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found; } @@ -893,65 +1178,65 @@ public static unsafe int LastIndexOfAny(ref byte searchSpace, byte value0, byte while ((byte*)nLength > (byte*)0) { nLength -= 1; - index -= 1; + offset -= 1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, index); + lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) goto Found; } - if (Vector.IsHardwareAccelerated && ((byte*)index > (byte*)0)) + if (Vector.IsHardwareAccelerated && ((byte*)offset > (byte*)0)) { - nLength = (IntPtr)((int)(byte*)index & ~(Vector.Count - 1)); + nLength = (IntPtr)((int)(byte*)offset & ~(Vector.Count - 1)); - // Get comparison Vector Vector values0 = new Vector(value0); Vector values1 = new Vector(value1); Vector values2 = new Vector(value2); while ((byte*)nLength > (byte*)(Vector.Count - 1)) { - Vector vData = Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref searchSpace, index - Vector.Count)); + Vector search = LoadVector(ref searchSpace, offset - Vector.Count); - var vMatches = Vector.BitwiseOr( + var matches = Vector.BitwiseOr( Vector.BitwiseOr( - Vector.Equals(vData, values0), - Vector.Equals(vData, values1)), - Vector.Equals(vData, values2)); + Vector.Equals(search, values0), + Vector.Equals(search, values1)), + Vector.Equals(search, values2)); - if (Vector.Zero.Equals(vMatches)) + if (Vector.Zero.Equals(matches)) { - index -= Vector.Count; + offset -= Vector.Count; nLength -= Vector.Count; continue; } + // Find offset of first match - return (int)(index) - Vector.Count + LocateLastFoundByte(vMatches); + return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); } - if ((byte*)index > (byte*)0) + if ((byte*)offset > (byte*)0) { - nLength = index; + nLength = offset; goto SequentialScan; } } return -1; Found: // Workaround for https://github.com/dotnet/coreclr/issues/13549 - return (int)(byte*)index; + return (int)(byte*)offset; Found1: - return (int)(byte*)(index + 1); + return (int)(byte*)(offset + 1); Found2: - return (int)(byte*)(index + 2); + return (int)(byte*)(offset + 2); Found3: - return (int)(byte*)(index + 3); + return (int)(byte*)(offset + 3); Found4: - return (int)(byte*)(index + 4); + return (int)(byte*)(offset + 4); Found5: - return (int)(byte*)(index + 5); + return (int)(byte*)(offset + 5); Found6: - return (int)(byte*)(index + 6); + return (int)(byte*)(offset + 6); Found7: - return (int)(byte*)(index + 7); + return (int)(byte*)(offset + 7); } // Optimized byte-based SequenceEquals. The "length" parameter for this one is declared a nuint rather than int as we also use it for types other than byte @@ -961,51 +1246,46 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l if (Unsafe.AreSame(ref first, ref second)) goto Equal; - IntPtr i = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations - IntPtr n = (IntPtr)(void*)length; + IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations + IntPtr nLength = (IntPtr)(void*)length; - if (Vector.IsHardwareAccelerated && (byte*)n >= (byte*)Vector.Count) + if (Vector.IsHardwareAccelerated && (byte*)nLength >= (byte*)Vector.Count) { - n -= Vector.Count; - while ((byte*)n > (byte*)i) + nLength -= Vector.Count; + while ((byte*)nLength > (byte*)offset) { - if (Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref first, i)) != - Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref second, i))) + if (LoadVector(ref first, offset) != LoadVector(ref second, offset)) { goto NotEqual; } - i += Vector.Count; + offset += Vector.Count; } - return Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref first, n)) == - Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref second, n)); + return LoadVector(ref first, nLength) == LoadVector(ref second, nLength); } - if ((byte*)n >= (byte*)sizeof(UIntPtr)) + if ((byte*)nLength >= (byte*)sizeof(UIntPtr)) { - n -= sizeof(UIntPtr); - while ((byte*)n > (byte*)i) + nLength -= sizeof(UIntPtr); + while ((byte*)nLength > (byte*)offset) { - if (Unsafe.ReadUnaligned(ref Unsafe.AddByteOffset(ref first, i)) != - Unsafe.ReadUnaligned(ref Unsafe.AddByteOffset(ref second, i))) + if (LoadUIntPtr(ref first, offset) != LoadUIntPtr(ref second, offset)) { goto NotEqual; } - i += sizeof(UIntPtr); + offset += sizeof(UIntPtr); } - return Unsafe.ReadUnaligned(ref Unsafe.AddByteOffset(ref first, n)) == - Unsafe.ReadUnaligned(ref Unsafe.AddByteOffset(ref second, n)); + return LoadUIntPtr(ref first, nLength) == LoadUIntPtr(ref second, nLength); } - while ((byte*)n > (byte*)i) + while ((byte*)nLength > (byte*)offset) { - if (Unsafe.AddByteOffset(ref first, i) != Unsafe.AddByteOffset(ref second, i)) + if (Unsafe.AddByteOffset(ref first, offset) != Unsafe.AddByteOffset(ref second, offset)) goto NotEqual; - i += 1; + offset += 1; } Equal: return true; - NotEqual: // Workaround for https://github.com/dotnet/coreclr/issues/13549 return false; } @@ -1041,45 +1321,43 @@ public static unsafe int SequenceCompareTo(ref byte first, int firstLength, ref IntPtr minLength = (IntPtr)((firstLength < secondLength) ? firstLength : secondLength); - IntPtr i = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations - IntPtr n = (IntPtr)(void*)minLength; + IntPtr offset = (IntPtr)0; // Use IntPtr for arithmetic to avoid unnecessary 64->32->64 truncations + IntPtr nLength = (IntPtr)(void*)minLength; - if (Vector.IsHardwareAccelerated && (byte*)n > (byte*)Vector.Count) + if (Vector.IsHardwareAccelerated && (byte*)nLength > (byte*)Vector.Count) { - n -= Vector.Count; - while ((byte*)n > (byte*)i) + nLength -= Vector.Count; + while ((byte*)nLength > (byte*)offset) { - if (Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref first, i)) != - Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref second, i))) + if (LoadVector(ref first, offset) != LoadVector(ref second, offset)) { goto NotEqual; } - i += Vector.Count; + offset += Vector.Count; } goto NotEqual; } - if ((byte*)n > (byte*)sizeof(UIntPtr)) + if ((byte*)nLength > (byte*)sizeof(UIntPtr)) { - n -= sizeof(UIntPtr); - while ((byte*)n > (byte*)i) + nLength -= sizeof(UIntPtr); + while ((byte*)nLength > (byte*)offset) { - if (Unsafe.ReadUnaligned(ref Unsafe.AddByteOffset(ref first, i)) != - Unsafe.ReadUnaligned(ref Unsafe.AddByteOffset(ref second, i))) + if (LoadUIntPtr(ref first, offset) != LoadUIntPtr(ref second, offset)) { goto NotEqual; } - i += sizeof(UIntPtr); + offset += sizeof(UIntPtr); } } NotEqual: // Workaround for https://github.com/dotnet/coreclr/issues/13549 - while ((byte*)minLength > (byte*)i) + while ((byte*)minLength > (byte*)offset) { - int result = Unsafe.AddByteOffset(ref first, i).CompareTo(Unsafe.AddByteOffset(ref second, i)); + int result = Unsafe.AddByteOffset(ref first, offset).CompareTo(Unsafe.AddByteOffset(ref second, offset)); if (result != 0) return result; - i += 1; + offset += 1; } Equal: @@ -1152,5 +1430,54 @@ private static int LocateLastFoundByte(ulong match) 0x03ul << 32 | 0x02ul << 40 | 0x01ul << 48) + 1; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe UIntPtr LoadUIntPtr(ref byte start, IntPtr offset) + => Unsafe.ReadUnaligned(ref Unsafe.AddByteOffset(ref start, offset)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe Vector LoadVector(ref byte start, IntPtr offset) + => Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref start, offset)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe Vector128 LoadVector128(ref byte start, IntPtr offset) + => Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref start, offset)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe Vector256 LoadVector256(ref byte start, IntPtr offset) + => Unsafe.ReadUnaligned>(ref Unsafe.AddByteOffset(ref start, offset)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe IntPtr GetByteVectorSpanLength(IntPtr offset, int length) + => (IntPtr)((length - (int)(byte*)offset) & ~(Vector.Count - 1)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe IntPtr GetByteVector128SpanLength(IntPtr offset, int length) + => (IntPtr)((length - (int)(byte*)offset) & ~(Vector128.Count - 1)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe IntPtr GetByteVector256SpanLength(IntPtr offset, int length) + => (IntPtr)((length - (int)(byte*)offset) & ~(Vector256.Count - 1)); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe IntPtr UnalignedByteCountVector(ref byte searchSpace) + { + int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector.Count - 1); + return (IntPtr)((Vector.Count - unaligned) & (Vector.Count - 1)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe IntPtr UnalignedByteCountVector128(ref byte searchSpace) + { + int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector128.Count - 1); + return (IntPtr)((Vector128.Count - unaligned) & (Vector128.Count - 1)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe IntPtr UnalignedByteCountVectorFromEnd(ref byte searchSpace, int length) + { + int unaligned = (int)Unsafe.AsPointer(ref searchSpace) & (Vector.Count - 1); + return (IntPtr)(((length & (Vector.Count - 1)) + unaligned) & (Vector.Count - 1)); + } } } diff --git a/src/System.Private.CoreLib/shared/System/SpanHelpers.cs b/src/System.Private.CoreLib/shared/System/SpanHelpers.cs index 0d5c57c5db90..9ea42ec39f15 100644 --- a/src/System.Private.CoreLib/shared/System/SpanHelpers.cs +++ b/src/System.Private.CoreLib/shared/System/SpanHelpers.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using System.Diagnostics; -using System.Globalization; using System.Runtime; using Internal.Runtime.CompilerServices;