Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 1980c7b

Browse files
EgorBostephentoub
andauthored
Vectorize String.Equals for OrdinalIgnoreCase (#77947)
Co-authored-by: Stephen Toub <[email protected]>
1 parent e319104 commit 1980c7b

File tree

2 files changed

+95
-0
lines changed

2 files changed

+95
-0
lines changed

src/libraries/System.Private.CoreLib/src/System/Globalization/Ordinal.cs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.Text.Unicode;
66
using System.Runtime.CompilerServices;
77
using System.Runtime.InteropServices;
8+
using System.Runtime.Intrinsics;
89

910
namespace System.Globalization
1011
{
@@ -75,7 +76,62 @@ internal static int CompareStringIgnoreCaseNonAscii(ref char strA, int lengthA,
7576
return OrdinalCasing.CompareStringIgnoreCase(ref strA, lengthA, ref strB, lengthB);
7677
}
7778

79+
private static bool EqualsIgnoreCase_Vector128(ref char charA, ref char charB, int length)
80+
{
81+
Debug.Assert(length >= Vector128<ushort>.Count);
82+
Debug.Assert(Vector128.IsHardwareAccelerated);
83+
84+
nuint lengthU = (nuint)length;
85+
nuint lengthToExamine = lengthU - (nuint)Vector128<ushort>.Count;
86+
nuint i = 0;
87+
Vector128<ushort> vec1;
88+
Vector128<ushort> vec2;
89+
do
90+
{
91+
vec1 = Vector128.LoadUnsafe(ref Unsafe.As<char, ushort>(ref charA), i);
92+
vec2 = Vector128.LoadUnsafe(ref Unsafe.As<char, ushort>(ref charB), i);
93+
94+
if (!Utf16Utility.AllCharsInVector128AreAscii(vec1 | vec2))
95+
{
96+
goto NON_ASCII;
97+
}
98+
99+
if (!Utf16Utility.Vector128OrdinalIgnoreCaseAscii(vec1, vec2))
100+
{
101+
return false;
102+
}
103+
104+
i += (nuint)Vector128<ushort>.Count;
105+
} while (i <= lengthToExamine);
106+
107+
// Use scalar path for trailing elements
108+
return i == lengthU || EqualsIgnoreCase(ref Unsafe.Add(ref charA, i), ref Unsafe.Add(ref charB, i), (int)(lengthU - i));
109+
110+
NON_ASCII:
111+
if (Utf16Utility.AllCharsInVector128AreAscii(vec1) || Utf16Utility.AllCharsInVector128AreAscii(vec2))
112+
{
113+
// No need to use the fallback if one of the inputs is full-ASCII
114+
return false;
115+
}
116+
117+
// Fallback for Non-ASCII inputs
118+
return CompareStringIgnoreCase(
119+
ref Unsafe.Add(ref charA, i), (int)(lengthU - i),
120+
ref Unsafe.Add(ref charB, i), (int)(lengthU - i)) == 0;
121+
}
122+
123+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
78124
internal static bool EqualsIgnoreCase(ref char charA, ref char charB, int length)
125+
{
126+
if (!Vector128.IsHardwareAccelerated || length < Vector128<ushort>.Count)
127+
{
128+
return EqualsIgnoreCase_Scalar(ref charA, ref charB, length);
129+
}
130+
131+
return EqualsIgnoreCase_Vector128(ref charA, ref charB, length);
132+
}
133+
134+
internal static bool EqualsIgnoreCase_Scalar(ref char charA, ref char charB, int length)
79135
{
80136
IntPtr byteOffset = IntPtr.Zero;
81137

src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
using System.Runtime.CompilerServices;
55
using System.Diagnostics;
6+
using System.Runtime.Intrinsics;
67

78
namespace System.Text.Unicode
89
{
@@ -217,5 +218,43 @@ internal static bool UInt64OrdinalIgnoreCaseAscii(ulong valueA, ulong valueB)
217218
indicator |= 0xFF7F_FF7F_FF7F_FF7Ful;
218219
return (differentBits & indicator) == 0;
219220
}
221+
222+
/// <summary>
223+
/// Returns true iff the Vector128 represents 8 ASCII UTF-16 characters in machine endianness.
224+
/// </summary>
225+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
226+
internal static bool AllCharsInVector128AreAscii(Vector128<ushort> vec)
227+
{
228+
return (vec & Vector128.Create(unchecked((ushort)~0x007F))) == Vector128<ushort>.Zero;
229+
}
230+
231+
/// <summary>
232+
/// Given two Vector128 that represent 8 ASCII UTF-16 characters each, returns true iff
233+
/// the two inputs are equal using an ordinal case-insensitive comparison.
234+
/// </summary>
235+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
236+
internal static bool Vector128OrdinalIgnoreCaseAscii(Vector128<ushort> vec1, Vector128<ushort> vec2)
237+
{
238+
// ASSUMPTION: Caller has validated that input values are ASCII.
239+
240+
// the 0x80 bit of each word of 'lowerIndicator' will be set iff the word has value >= 'A'
241+
Vector128<sbyte> lowIndicator1 = Vector128.Create((sbyte)(0x80 - 'A')) + vec1.AsSByte();
242+
Vector128<sbyte> lowIndicator2 = Vector128.Create((sbyte)(0x80 - 'A')) + vec2.AsSByte();
243+
244+
// the 0x80 bit of each word of 'combinedIndicator' will be set iff the word has value >= 'A' and <= 'Z'
245+
Vector128<sbyte> combIndicator1 =
246+
Vector128.LessThan(Vector128.Create(unchecked((sbyte)(('Z' - 'A') - 0x80))), lowIndicator1);
247+
Vector128<sbyte> combIndicator2 =
248+
Vector128.LessThan(Vector128.Create(unchecked((sbyte)(('Z' - 'A') - 0x80))), lowIndicator2);
249+
250+
// Convert both vectors to lower case by adding 0x20 bit for all [A-Z][a-z] characters
251+
Vector128<sbyte> lcVec1 =
252+
Vector128.AndNot(Vector128.Create((sbyte)0x20), combIndicator1) + vec1.AsSByte();
253+
Vector128<sbyte> lcVec2 =
254+
Vector128.AndNot(Vector128.Create((sbyte)0x20), combIndicator2) + vec2.AsSByte();
255+
256+
// Compare two lowercased vectors
257+
return (lcVec1 ^ lcVec2) == Vector128<sbyte>.Zero;
258+
}
220259
}
221260
}

0 commit comments

Comments
 (0)