diff --git a/src/HttpUserAgentParser/HttpUserAgentParser.cs b/src/HttpUserAgentParser/HttpUserAgentParser.cs index 3e6cf6b..7b1e419 100644 --- a/src/HttpUserAgentParser/HttpUserAgentParser.cs +++ b/src/HttpUserAgentParser/HttpUserAgentParser.cs @@ -1,7 +1,10 @@ // Copyright © https://myCSharp.de - all rights reserved +using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; namespace MyCSharp.HttpUserAgentParser; @@ -206,45 +209,133 @@ private static bool TryExtractVersion(ReadOnlySpan haystack, out Range ran { range = default; - // Limit search window to avoid scanning entire UA string unnecessarily - const int Window = 128; - if (haystack.Length > Window) - { - haystack = haystack.Slice(0, Window); - } + // Vectorization is used in a optimistic way and specialized to common (trimmed down) user agents. + // When the first two char-vectors don't yield any success, we fall back to the scalar path. + // This penalized not found versions, but has an advantage for found versions. + // Vector512 is left out, because there are no common inputs with length 128 or more. + // + // Two short (same size as char) vectors are read, then packed to byte vectors on which the + // operation is done. For short / chart the higher byte is not of interest and zero or outside + // the target characters, thus with bytes we can process twice as much elements at once. - // Find first digit - int start = -1; - for (int i = 0; i < haystack.Length; i++) + if (Vector256.IsHardwareAccelerated && haystack.Length >= 2 * Vector256.Count) { - char c = haystack[i]; - if (c >= '0' && c <= '9') + ref char ptr = ref MemoryMarshal.GetReference(haystack); + + Vector256 vec = ptr.ReadVector256AsBytes(0); + Vector256 between0and9 = Vector256.LessThan(vec - Vector256.Create((byte)'0'), Vector256.Create((byte)('9' - '0' + 1))); + + if (between0and9 == Vector256.Zero) { - start = i; - break; + goto Scalar; } - } - if (start < 0) + uint bitMask = between0and9.ExtractMostSignificantBits(); + int idx = (int)uint.TrailingZeroCount(bitMask); + Debug.Assert(idx is >= 0 and <= 32); + int start = idx; + + Vector256 byteMask = between0and9 | Vector256.Equals(vec, Vector256.Create((byte)'.')); + byteMask = ~byteMask; + + if (byteMask == Vector256.Zero) + { + goto Scalar; + } + + bitMask = byteMask.ExtractMostSignificantBits(); + bitMask >>= start; + + idx = start + (int)uint.TrailingZeroCount(bitMask); + Debug.Assert(idx is >= 0 and <= 32); + int end = idx; + + range = new Range(start, end); + return true; + } + else if (Vector128.IsHardwareAccelerated && haystack.Length >= 2 * Vector128.Count) { - // No digit found => no version - return false; + ref char ptr = ref MemoryMarshal.GetReference(haystack); + + Vector128 vec = ptr.ReadVector128AsBytes(0); + Vector128 between0and9 = Vector128.LessThan(vec - Vector128.Create((byte)'0'), Vector128.Create((byte)('9' - '0' + 1))); + + if (between0and9 == Vector128.Zero) + { + goto Scalar; + } + + uint bitMask = between0and9.ExtractMostSignificantBits(); + int idx = (int)uint.TrailingZeroCount(bitMask); + Debug.Assert(idx is >= 0 and <= 16); + int start = idx; + + Vector128 byteMask = between0and9 | Vector128.Equals(vec, Vector128.Create((byte)'.')); + byteMask = ~byteMask; + + if (byteMask == Vector128.Zero) + { + goto Scalar; + } + + bitMask = byteMask.ExtractMostSignificantBits(); + bitMask >>= start; + + idx = start + (int)uint.TrailingZeroCount(bitMask); + Debug.Assert(idx is >= 0 and <= 16); + int end = idx; + + range = new Range(start, end); + return true; } - // Consume digits and dots after first digit - int end = start + 1; - while (end < haystack.Length) + Scalar: { - char c = haystack[end]; - if (!((c >= '0' && c <= '9') || c == '.')) + // Limit search window to avoid scanning entire UA string unnecessarily + const int Windows = 128; + if (haystack.Length > Windows) { - break; + haystack = haystack.Slice(0, Windows); + } + + int start = -1; + int i = 0; + + for (; i < haystack.Length; ++i) + { + char c = haystack[i]; + if (char.IsBetween(c, '0', '9')) + { + start = i; + break; + } + } + + if (start < 0) + { + // No digit found => no version + return false; + } + + haystack = haystack.Slice(i + 1); + for (i = 0; i < haystack.Length; ++i) + { + char c = haystack[i]; + if (!(char.IsBetween(c, '0', '9') || c == '.')) + { + break; + } } - end++; - } - // Create exclusive end range - range = new Range(start, end); - return true; + i += start + 1; // shift back the previous domain + + if (i == start) + { + return false; + } + + range = new Range(start, i); + return true; + } } } diff --git a/src/HttpUserAgentParser/VectorExtensions.cs b/src/HttpUserAgentParser/VectorExtensions.cs new file mode 100644 index 0000000..c8547d0 --- /dev/null +++ b/src/HttpUserAgentParser/VectorExtensions.cs @@ -0,0 +1,78 @@ +// Copyright © https://myCSharp.de - all rights reserved + +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; + +namespace MyCSharp.HttpUserAgentParser; + +internal static class VectorExtensions +{ + extension(ref char c) + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Vector128 ReadVector128AsBytes(int offset) + { + ref short ptr = ref Unsafe.As(ref c); + +#if NET10_0_OR_GREATER + return Vector128.NarrowWithSaturation( + Vector128.LoadUnsafe(ref ptr, (uint)offset), + Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128.Count)) + ).AsByte(); +#else + if (Sse2.IsSupported) + { + return Sse2.PackUnsignedSaturate( + Vector128.LoadUnsafe(ref ptr, (uint)offset), + Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128.Count))); + } + else if (AdvSimd.Arm64.IsSupported) + { + return AdvSimd.Arm64.UnzipEven( + Vector128.LoadUnsafe(ref ptr, (uint)offset).AsByte(), + Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128.Count)).AsByte()); + } + else + { + return Vector128.Narrow( + Vector128.LoadUnsafe(ref ptr, (uint)offset), + Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128.Count)) + ).AsByte(); + } +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Vector256 ReadVector256AsBytes(int offset) + { + ref short ptr = ref Unsafe.As(ref c); + +#if NET10_0_OR_GREATER + return Vector256.NarrowWithSaturation( + Vector256.LoadUnsafe(ref ptr, (uint)offset), + Vector256.LoadUnsafe(ref ptr, (uint)offset + (uint)Vector256.Count) + ).AsByte(); +#else + if (Avx2.IsSupported) + { + Vector256 tmp = Avx2.PackUnsignedSaturate( + Vector256.LoadUnsafe(ref ptr, (uint)offset), + Vector256.LoadUnsafe(ref ptr, (uint)offset + (uint)Vector256.Count)); + + Vector256 tmp1 = Avx2.Permute4x64(tmp.AsInt64(), 0b_11_01_10_00); + + return tmp1.AsByte(); + } + else + { + return Vector256.Narrow( + Vector256.LoadUnsafe(ref ptr, (uint)offset), + Vector256.LoadUnsafe(ref ptr, (uint)offset + (uint)Vector256.Count) + ).AsByte(); + } +#endif + } + } +}