Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 119 additions & 28 deletions src/HttpUserAgentParser/HttpUserAgentParser.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
// Copyright © https://myCSharp.de - all rights reserved

using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;

namespace MyCSharp.HttpUserAgentParser;

Expand Down Expand Up @@ -206,45 +209,133 @@ private static bool TryExtractVersion(ReadOnlySpan<char> haystack, out Range ran
{
range = default;

// Limit search window to avoid scanning entire UA string unnecessarily
const int Window = 128;
if (haystack.Length > Window)
{
haystack = haystack.Slice(0, Window);
}
// Vectorization is used in a optimistic way and specialized to common (trimmed down) user agents.
// When the first two char-vectors don't yield any success, we fall back to the scalar path.
// This penalized not found versions, but has an advantage for found versions.
// Vector512 is left out, because there are no common inputs with length 128 or more.
//
// Two short (same size as char) vectors are read, then packed to byte vectors on which the
// operation is done. For short / chart the higher byte is not of interest and zero or outside
// the target characters, thus with bytes we can process twice as much elements at once.

// Find first digit
int start = -1;
for (int i = 0; i < haystack.Length; i++)
if (Vector256.IsHardwareAccelerated && haystack.Length >= 2 * Vector256<short>.Count)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm pretty sure the implementation is correct, but should we add some kind of toggle (env variable) with which vectorization can be disabled in case there's a bug in the code (i.e. from some strange user agent that we don't know at the moment), so a user could still use the lib w/o the need to wait for hotfix release?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the environment variable is visible to users. It's kind of hidden black magic and users will remove the library without checking. For me it feels the IndexOf way is the most stable and a very fast solution for now, no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IndexOf is quite good, but for the 11.0) like Gecko benchmark it's +75% slower and such short versions are quite common.

The specialized vectorized code is nice, but it's a lot of hard to maintain code so actually I don't like this approach very much.

I'd like to keep this PR open for a moment, so I'll be able to explore other approaches for speed-up too.
The easiest one is to shorten the "window" in

const int Window = 128;
if (haystack.Length > Window)
{
haystack = haystack.Slice(0, Window);
}
, so that a) vectorization of IndexOf can still be used and b) it's no longer than the longest assumed version.

Further the biggest speed-up may come from a not linear scan through all possible patterns.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Further the biggest speed-up may come from a not linear scan through all possible patterns.

In my head I'm ready with such an approach, but I need to finish a work-project, then I'll prototype it and see how it goes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Feel free, have complete confidence in your everything looks impressively good!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The first try with that idea is nice, as the user agent only needs to be scaned once ($O(n)$) and not for every possibility.
But for some browsers, where we depend on the order in the arrays, the wrong result is yielded. So I have to research a bit more.

Maybe there will move also some things around (w/o API breaking changes), so I'd like to leave this PR open so that all together can be done or only parts of it.

I hope to continue on this next weeks.

{
char c = haystack[i];
if (c >= '0' && c <= '9')
ref char ptr = ref MemoryMarshal.GetReference(haystack);

Vector256<byte> vec = ptr.ReadVector256AsBytes(0);
Vector256<byte> between0and9 = Vector256.LessThan(vec - Vector256.Create((byte)'0'), Vector256.Create((byte)('9' - '0' + 1)));

if (between0and9 == Vector256<byte>.Zero)
{
start = i;
break;
goto Scalar;
}
}

if (start < 0)
uint bitMask = between0and9.ExtractMostSignificantBits();
int idx = (int)uint.TrailingZeroCount(bitMask);
Debug.Assert(idx is >= 0 and <= 32);
int start = idx;

Vector256<byte> byteMask = between0and9 | Vector256.Equals(vec, Vector256.Create((byte)'.'));
byteMask = ~byteMask;

if (byteMask == Vector256<byte>.Zero)
{
goto Scalar;
}

bitMask = byteMask.ExtractMostSignificantBits();
bitMask >>= start;

idx = start + (int)uint.TrailingZeroCount(bitMask);
Debug.Assert(idx is >= 0 and <= 32);
int end = idx;

range = new Range(start, end);
return true;
}
else if (Vector128.IsHardwareAccelerated && haystack.Length >= 2 * Vector128<short>.Count)
{
// No digit found => no version
return false;
ref char ptr = ref MemoryMarshal.GetReference(haystack);

Vector128<byte> vec = ptr.ReadVector128AsBytes(0);
Vector128<byte> between0and9 = Vector128.LessThan(vec - Vector128.Create((byte)'0'), Vector128.Create((byte)('9' - '0' + 1)));

if (between0and9 == Vector128<byte>.Zero)
{
goto Scalar;
}

uint bitMask = between0and9.ExtractMostSignificantBits();
int idx = (int)uint.TrailingZeroCount(bitMask);
Debug.Assert(idx is >= 0 and <= 16);
int start = idx;

Vector128<byte> byteMask = between0and9 | Vector128.Equals(vec, Vector128.Create((byte)'.'));
byteMask = ~byteMask;

if (byteMask == Vector128<byte>.Zero)
{
goto Scalar;
}

bitMask = byteMask.ExtractMostSignificantBits();
bitMask >>= start;

idx = start + (int)uint.TrailingZeroCount(bitMask);
Debug.Assert(idx is >= 0 and <= 16);
int end = idx;

range = new Range(start, end);
return true;
}

// Consume digits and dots after first digit
int end = start + 1;
while (end < haystack.Length)
Scalar:
{
char c = haystack[end];
if (!((c >= '0' && c <= '9') || c == '.'))
// Limit search window to avoid scanning entire UA string unnecessarily
const int Windows = 128;
if (haystack.Length > Windows)
{
break;
haystack = haystack.Slice(0, Windows);
}

int start = -1;
int i = 0;

for (; i < haystack.Length; ++i)
{
char c = haystack[i];
if (char.IsBetween(c, '0', '9'))
{
start = i;
break;
}
}

if (start < 0)
{
// No digit found => no version
return false;
}

haystack = haystack.Slice(i + 1);
for (i = 0; i < haystack.Length; ++i)
{
char c = haystack[i];
if (!(char.IsBetween(c, '0', '9') || c == '.'))
{
break;
}
}
end++;
}

// Create exclusive end range
range = new Range(start, end);
return true;
i += start + 1; // shift back the previous domain

if (i == start)
{
return false;
}

range = new Range(start, i);
return true;
}
}
}
78 changes: 78 additions & 0 deletions src/HttpUserAgentParser/VectorExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright © https://myCSharp.de - all rights reserved

using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;

namespace MyCSharp.HttpUserAgentParser;

internal static class VectorExtensions
{
extension(ref char c)
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Vector128<byte> ReadVector128AsBytes(int offset)
{
ref short ptr = ref Unsafe.As<char, short>(ref c);

#if NET10_0_OR_GREATER
return Vector128.NarrowWithSaturation(
Vector128.LoadUnsafe(ref ptr, (uint)offset),
Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128<short>.Count))
).AsByte();
#else
if (Sse2.IsSupported)
{
return Sse2.PackUnsignedSaturate(
Vector128.LoadUnsafe(ref ptr, (uint)offset),
Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128<short>.Count)));
}
else if (AdvSimd.Arm64.IsSupported)
{
return AdvSimd.Arm64.UnzipEven(
Vector128.LoadUnsafe(ref ptr, (uint)offset).AsByte(),
Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128<short>.Count)).AsByte());
}
else
{
return Vector128.Narrow(
Vector128.LoadUnsafe(ref ptr, (uint)offset),
Vector128.LoadUnsafe(ref ptr, (uint)(offset + Vector128<short>.Count))
).AsByte();
}
#endif
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Vector256<byte> ReadVector256AsBytes(int offset)
{
ref short ptr = ref Unsafe.As<char, short>(ref c);

#if NET10_0_OR_GREATER
return Vector256.NarrowWithSaturation(
Vector256.LoadUnsafe(ref ptr, (uint)offset),
Vector256.LoadUnsafe(ref ptr, (uint)offset + (uint)Vector256<short>.Count)
).AsByte();
#else
if (Avx2.IsSupported)
{
Vector256<byte> tmp = Avx2.PackUnsignedSaturate(
Vector256.LoadUnsafe(ref ptr, (uint)offset),
Vector256.LoadUnsafe(ref ptr, (uint)offset + (uint)Vector256<short>.Count));

Vector256<long> tmp1 = Avx2.Permute4x64(tmp.AsInt64(), 0b_11_01_10_00);

return tmp1.AsByte();
}
else
{
return Vector256.Narrow(
Vector256.LoadUnsafe(ref ptr, (uint)offset),
Vector256.LoadUnsafe(ref ptr, (uint)offset + (uint)Vector256<short>.Count)
).AsByte();
}
#endif
}
}
}
Loading