mycsharp · gfoidl · Sep 6, 2025 · Sep 6, 2025 · gfoidl · Sep 6, 2025
diff --git a/src/HttpUserAgentParser/HttpUserAgentParser.cs b/src/HttpUserAgentParser/HttpUserAgentParser.cs
@@ -1,7 +1,10 @@
 // Copyright © https://myCSharp.de - all rights reserved
 
+using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
 
 namespace MyCSharp.HttpUserAgentParser;
 
@@ -206,45 +209,133 @@ private static bool TryExtractVersion(ReadOnlySpan<char> haystack, out Range ran
     {
         range = default;
 
-        // Limit search window to avoid scanning entire UA string unnecessarily
-        const int Window = 128;
-        if (haystack.Length > Window)
-        {
-            haystack = haystack.Slice(0, Window);
-        }
+        // Vectorization is used in a optimistic way and specialized to common (trimmed down) user agents.
+        // When the first two char-vectors don't yield any success, we fall back to the scalar path.
+        // This penalized not found versions, but has an advantage for found versions.
+        // Vector512 is left out, because there are no common inputs with length 128 or more.
+        //
+        // Two short (same size as char) vectors are read, then packed to byte vectors on which the
+        // operation is done. For short / chart the higher byte is not of interest and zero or outside
+        // the target characters, thus with bytes we can process twice as much elements at once.
 
-        // Find first digit
-        int start = -1;
-        for (int i = 0; i < haystack.Length; i++)
+        if (Vector256.IsHardwareAccelerated && haystack.Length >= 2 * Vector256<short>.Count)
 const int Window = 128; 
 if (haystack.Length > Window) 
 { 
     haystack = haystack.Slice(0, Window); 
 } 
 const int Window = 128; 
 if (haystack.Length > Window) 
 { 
     haystack = haystack.Slice(0, Window); 
 } 
         {
-            char c = haystack[i];
-            if (c >= '0' && c <= '9')
+            ref char ptr = ref MemoryMarshal.GetReference(haystack);
+
+            Vector256<byte> vec = ptr.ReadVector256AsBytes(0);
+            Vector256<byte> between0and9 = Vector256.LessThan(vec - Vector256.Create((byte)'0'), Vector256.Create((byte)('9' - '0' + 1)));
+
+            if (between0and9 == Vector256<byte>.Zero)
             {
-                start = i;
-                break;
+                goto Scalar;
             }
-        }
 
-        if (start < 0)
+            uint bitMask = between0and9.ExtractMostSignificantBits();
+            int idx = (int)uint.TrailingZeroCount(bitMask);
+            Debug.Assert(idx is >= 0 and <= 32);
+            int start = idx;
+
+            Vector256<byte> byteMask = between0and9 | Vector256.Equals(vec, Vector256.Create((byte)'.'));
+            byteMask = ~byteMask;
+
+            if (byteMask == Vector256<byte>.Zero)
+            {
+                goto Scalar;
+            }
+
+            bitMask = byteMask.ExtractMostSignificantBits();
+            bitMask >>= start;
+
+            idx = start + (int)uint.TrailingZeroCount(bitMask);
+            Debug.Assert(idx is >= 0 and <= 32);
+            int end = idx;
+
+            range = new Range(start, end);
+            return true;
+        }
+        else if (Vector128.IsHardwareAccelerated && haystack.Length >= 2 * Vector128<short>.Count)
         {
-            // No digit found => no version
-            return false;
+            ref char ptr = ref MemoryMarshal.GetReference(haystack);
+
+            Vector128<byte> vec = ptr.ReadVector128AsBytes(0);
+            Vector128<byte> between0and9 = Vector128.LessThan(vec - Vector128.Create((byte)'0'), Vector128.Create((byte)('9' - '0' + 1)));
+
+            if (between0and9 == Vector128<byte>.Zero)
+            {
+                goto Scalar;
+            }
+
+            uint bitMask = between0and9.ExtractMostSignificantBits();
+            int idx = (int)uint.TrailingZeroCount(bitMask);
+            Debug.Assert(idx is >= 0 and <= 16);
+            int start = idx;
+
+            Vector128<byte> byteMask = between0and9 | Vector128.Equals(vec, Vector128.Create((byte)'.'));
+            byteMask = ~byteMask;
+
+            if (byteMask == Vector128<byte>.Zero)
+            {
+                goto Scalar;
+            }
+
+            bitMask = byteMask.ExtractMostSignificantBits();
+            bitMask >>= start;
+
+            idx = start + (int)uint.TrailingZeroCount(bitMask);
+            Debug.Assert(idx is >= 0 and <= 16);
+            int end = idx;
+
+            range = new Range(start, end);
+            return true;
         }
 
-        // Consume digits and dots after first digit
-        int end = start + 1;
-        while (end < haystack.Length)
+    Scalar:
         {
-            char c = haystack[end];
-            if (!((c >= '0' && c <= '9') || c == '.'))
+            // Limit search window to avoid scanning entire UA string unnecessarily
+            const int Windows = 128;
+            if (haystack.Length > Windows)
             {
-                break;
+                haystack = haystack.Slice(0, Windows);
+            }
+
+            int start = -1;
+            int i = 0;
+
+            for (; i < haystack.Length; ++i)
+            {
+                char c = haystack[i];
+                if (char.IsBetween(c, '0', '9'))
+                {
+                    start = i;
+                    break;
+                }
+            }
+
+            if (start < 0)
+            {
+                // No digit found => no version
+                return false;
+            }
+
+            haystack = haystack.Slice(i + 1);
+            for (i = 0; i < haystack.Length; ++i)
+            {
+                char c = haystack[i];
+                if (!(char.IsBetween(c, '0', '9') || c == '.'))
+                {
+                    break;
+                }
             }
-            end++;
-        }
 
-        // Create exclusive end range
-        range = new Range(start, end);
-        return true;
+            i += start + 1;     // shift back the previous domain
+
+            if (i == start)
+            {
+                return false;
+            }
+
+            range = new Range(start, i);
+            return true;
+        }
     }
 }
diff --git a/src/HttpUserAgentParser/VectorExtensions.cs b/src/HttpUserAgentParser/VectorExtensions.cs
@@ -0,0 +1,78 @@
+// Copyright © https://myCSharp.de - all rights reserved
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Intrinsics.X86;
+
+namespace MyCSharp.HttpUserAgentParser;
+
+internal static class VectorExtensions
+{
+    extension(ref char c)
+    {
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public Vector128<byte> ReadVector128AsBytes(int offset)
+        {
+            ref short ptr = ref Unsafe.As<char, short>(ref c);
+
+#if NET10_0_OR_GREATER
+            return Vector128.NarrowWithSaturation(
+                Vector128.LoadUnsafe(ref ptr, (uint)offset),
+                Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128<short>.Count))
+            ).AsByte();
+#else
+            if (Sse2.IsSupported)
+            {
+                return Sse2.PackUnsignedSaturate(
+                    Vector128.LoadUnsafe(ref ptr, (uint)offset),
+                    Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128<short>.Count)));
+            }
+            else if (AdvSimd.Arm64.IsSupported)
+            {
+                return AdvSimd.Arm64.UnzipEven(
+                    Vector128.LoadUnsafe(ref ptr, (uint)offset).AsByte(),
+                    Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128<short>.Count)).AsByte());
+            }
+            else
+            {
+                return Vector128.Narrow(
+                    Vector128.LoadUnsafe(ref ptr, (uint)offset),
+                    Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128<short>.Count))
+                ).AsByte();
+            }
+#endif
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public Vector256<byte> ReadVector256AsBytes(int offset)
+        {
+            ref short ptr = ref Unsafe.As<char, short>(ref c);
+
+#if NET10_0_OR_GREATER
+            return Vector256.NarrowWithSaturation(
+                Vector256.LoadUnsafe(ref ptr, (uint)offset),
+                Vector256.LoadUnsafe(ref ptr, (uint)offset + (uint)Vector256<short>.Count)
+            ).AsByte();
+#else
+            if (Avx2.IsSupported)
+            {
+                Vector256<byte> tmp = Avx2.PackUnsignedSaturate(
+                    Vector256.LoadUnsafe(ref ptr, (uint)offset),
+                    Vector256.LoadUnsafe(ref ptr, (uint)offset + (uint)Vector256<short>.Count));
+
+                Vector256<long> tmp1 = Avx2.Permute4x64(tmp.AsInt64(), 0b_11_01_10_00);
+
+                return tmp1.AsByte();
+            }
+            else
+            {
+                return Vector256.Narrow(
+                    Vector256.LoadUnsafe(ref ptr, (uint)offset),
+                    Vector256.LoadUnsafe(ref ptr, (uint)offset + (uint)Vector256<short>.Count)
+                ).AsByte();
+            }
+#endif
+        }
+    }
+}