diff --git a/README.md b/README.md index c43e521..fe6a97b 100644 --- a/README.md +++ b/README.md @@ -122,21 +122,21 @@ AMD Ryzen 9 9950X, 1 CPU, 32 logical and 16 physical cores Job=ShortRun IterationCount=3 LaunchCount=1 WarmupCount=3 -| Method | Categories | Data | Mean | Error | StdDev | Ratio | RatioSD | Gen0 | Gen1 | Gen2 | Allocated | Alloc Ratio | -|------------------- |----------- |------------- |----------------:|-----------------:|---------------:|----------:|--------:|---------:|---------:|---------:|-----------:|------------:| -| MyCSharp | Basic | Chrome Win10 | 936.44 ns | 131.253 ns | 7.194 ns | 1.00 | 0.01 | 0.0029 | - | - | 48 B | 1.00 | -| UAParser | Basic | Chrome Win10 | 9,512,347.40 ns | 3,961,045.109 ns | 217,118.249 ns | 10,158.42 | 211.89 | 656.2500 | 546.8750 | 109.3750 | 11523315 B | 240,069.06 | -| DeviceDetector.NET | Basic | Chrome Win10 | 5,428,530.73 ns | 5,276,988.556 ns | 289,249.550 ns | 5,797.23 | 270.29 | 296.8750 | 125.0000 | 31.2500 | 5002239 B | 104,213.31 | -| | | | | | | | | | | | | | -| MyCSharp | Basic | Google-Bot | 165.66 ns | 21.926 ns | 1.202 ns | 1.00 | 0.01 | - | - | - | - | NA | -| UAParser | Basic | Google-Bot | 9,737,403.12 ns | 2,336,698.462 ns | 128,082.328 ns | 58,781.92 | 764.74 | 671.8750 | 656.2500 | 109.3750 | 11877003 B | NA | -| DeviceDetector.NET | Basic | Google-Bot | 6,331,960.42 ns | 1,602,716.199 ns | 87,850.283 ns | 38,224.23 | 518.30 | 500.0000 | 62.5000 | - | 8817013 B | NA | -| | | | | | | | | | | | | | -| MyCSharp | Cached | Chrome Win10 | 26.75 ns | 3.749 ns | 0.205 ns | 1.00 | 0.01 | - | - | - | - | NA | -| UAParser | Cached | Chrome Win10 | 250,039.55 ns | 6,502.182 ns | 356.407 ns | 9,346.54 | 63.39 | 2.1973 | - | - | 37488 B | NA | -| | | | | | | | | | | | | | -| MyCSharp | Cached | Google-Bot | 19.66 ns | 4.312 ns | 0.236 ns | 1.00 | 0.01 | - | - | - | - | NA | -| UAParser | Cached | Google-Bot | 184,991.85 ns | 46,235.986 ns | 2,534.350 ns | 9,408.77 | 148.82 | 2.6855 | - | - | 45857 B | NA | +| Method | Categories | Data | Mean | Error | StdDev | Ratio | RatioSD | Gen0 | Gen1 | Gen2 | Allocated | Alloc Ratio | +|------------------- |----------- |------------- |----------------:|-----------------:|---------------:|----------:|---------:|---------:|---------:|---------:|-----------:|------------:| +| MyCSharp | Basic | Chrome Win10 | 871.85 ns | 132.008 ns | 7.236 ns | 1.00 | 0.01 | 0.0029 | - | - | 48 B | 1.00 | +| UAParser | Basic | Chrome Win10 | 8,901,909.90 ns | 3,411,259.484 ns | 186,982.644 ns | 10,210.80 | 199.60 | 656.2500 | 578.1250 | 109.3750 | 11523310 B | 240,068.96 | +| DeviceDetector.NET | Basic | Chrome Win10 | 5,391,412.50 ns | 8,253,446.769 ns | 452,399.269 ns | 6,184.14 | 451.58 | 296.8750 | 125.0000 | 31.2500 | 5002239 B | 104,213.31 | +| | | | | | | | | | | | | | +| MyCSharp | Basic | Google-Bot | 158.80 ns | 19.584 ns | 1.073 ns | 1.00 | 0.01 | - | - | - | - | NA | +| UAParser | Basic | Google-Bot | 9,666,739.32 ns | 7,566,085.041 ns | 414,722.653 ns | 60,873.62 | 2,289.43 | 671.8750 | 656.2500 | 109.3750 | 11876998 B | NA | +| DeviceDetector.NET | Basic | Google-Bot | 6,106,666.41 ns | 593,634.990 ns | 32,539.137 ns | 38,455.05 | 285.97 | 539.0625 | 117.1875 | 23.4375 | 8817078 B | NA | +| | | | | | | | | | | | | | +| MyCSharp | Cached | Chrome Win10 | 26.43 ns | 0.132 ns | 0.007 ns | 1.00 | 0.00 | - | - | - | - | NA | +| UAParser | Cached | Chrome Win10 | 177,417.99 ns | 24,390.139 ns | 1,336.906 ns | 6,713.66 | 43.84 | 2.1973 | - | - | 37488 B | NA | +| | | | | | | | | | | | | | +| MyCSharp | Cached | Google-Bot | 17.03 ns | 1.835 ns | 0.101 ns | 1.00 | 0.01 | - | - | - | - | NA | +| UAParser | Cached | Google-Bot | 129,445.13 ns | 21,319.059 ns | 1,168.570 ns | 7,599.76 | 70.93 | 2.6855 | - | - | 45857 B | NA | ``` ## Disclaimer diff --git a/src/HttpUserAgentParser/HttpUserAgentParser.cs b/src/HttpUserAgentParser/HttpUserAgentParser.cs index 5e788fd..3e6cf6b 100644 --- a/src/HttpUserAgentParser/HttpUserAgentParser.cs +++ b/src/HttpUserAgentParser/HttpUserAgentParser.cs @@ -11,7 +11,6 @@ namespace MyCSharp.HttpUserAgentParser; /// Parser logic for user agents /// public static class HttpUserAgentParser - { /// /// Parses given user agent @@ -48,7 +47,6 @@ public static HttpUserAgentInformation Parse(string userAgent) /// public static HttpUserAgentPlatformInformation? GetPlatform(string userAgent) { - // Fast, allocation-free token scan (keeps public statics untouched) ReadOnlySpan ua = userAgent.AsSpan(); foreach ((string Token, string Name, HttpUserAgentPlatformType PlatformType) platform in HttpUserAgentStatics.s_platformRules) { @@ -78,6 +76,7 @@ public static bool TryGetPlatform(string userAgent, [NotNullWhen(true)] out Http public static (string Name, string? Version)? GetBrowser(string userAgent) { ReadOnlySpan ua = userAgent.AsSpan(); + foreach ((string Name, string DetectToken, string? VersionToken) browserRule in HttpUserAgentStatics.s_browserRules) { if (!TryIndexOf(ua, browserRule.DetectToken, out int detectIndex)) @@ -86,7 +85,18 @@ public static (string Name, string? Version)? GetBrowser(string userAgent) } // Version token may differ (e.g., Safari uses "Version/") - int versionSearchStart = detectIndex; + + int versionSearchStart; + // For rules without a specific version token, ensure pattern Token/ + if (string.IsNullOrEmpty(browserRule.VersionToken)) + { + int afterDetect = detectIndex + browserRule.DetectToken.Length; + if (afterDetect >= ua.Length || ua[afterDetect] != '/') + { + // Likely a misspelling or partial token (e.g., Edgg, Oprea, Chromee) + continue; + } + } if (!string.IsNullOrEmpty(browserRule.VersionToken)) { if (TryIndexOf(ua, browserRule.VersionToken!, out int vtIndex)) @@ -104,14 +114,14 @@ public static (string Name, string? Version)? GetBrowser(string userAgent) versionSearchStart = detectIndex + browserRule.DetectToken.Length; } - string? version = null; - ua = ua.Slice(versionSearchStart); - if (TryExtractVersion(ua, out Range range)) + ReadOnlySpan search = ua.Slice(versionSearchStart); + if (TryExtractVersion(search, out Range range)) { - version = ua[range].ToString(); + string? version = search[range].ToString(); + return (browserRule.Name, version); } - return (browserRule.Name, version); + // If we didn't find a version for this rule, try next rule } return null; @@ -198,39 +208,43 @@ private static bool TryExtractVersion(ReadOnlySpan haystack, out Range ran // Limit search window to avoid scanning entire UA string unnecessarily const int Window = 128; - if (haystack.Length >= Window) + if (haystack.Length > Window) { haystack = haystack.Slice(0, Window); } - int i = 0; - for (; i < haystack.Length; ++i) + // Find first digit + int start = -1; + for (int i = 0; i < haystack.Length; i++) { char c = haystack[i]; - if (char.IsBetween(c, '0', '9')) + if (c >= '0' && c <= '9') { + start = i; break; } } - int s = i; - haystack = haystack.Slice(i + 1); - for (i = 0; i < haystack.Length; ++i) + if (start < 0) { - char c = haystack[i]; - if (!(char.IsBetween(c, '0', '9') || c == '.')) - { - break; - } + // No digit found => no version + return false; } - i += s + 1; // shift back the previous domain - if (i == s) + // Consume digits and dots after first digit + int end = start + 1; + while (end < haystack.Length) { - return false; + char c = haystack[end]; + if (!((c >= '0' && c <= '9') || c == '.')) + { + break; + } + end++; } - range = new Range(s, i); + // Create exclusive end range + range = new Range(start, end); return true; } } diff --git a/src/HttpUserAgentParser/HttpUserAgentStatics.cs b/src/HttpUserAgentParser/HttpUserAgentStatics.cs index 3eca3f0..996abab 100644 --- a/src/HttpUserAgentParser/HttpUserAgentStatics.cs +++ b/src/HttpUserAgentParser/HttpUserAgentStatics.cs @@ -187,6 +187,7 @@ internal static readonly (string Name, string DetectToken, string? VersionToken) ("Opera", "OPR", null), ("Flock", "Flock", null), ("Edge", "Edge", null), + ("Edge", "EdgiOS", null), ("Edge", "EdgA", null), ("Edge", "Edg", null), ("Vivaldi", "Vivaldi", null), @@ -208,7 +209,6 @@ internal static readonly (string Name, string DetectToken, string? VersionToken) ("Netscape", "Netscape", null), ("OmniWeb", "OmniWeb", null), ("Safari", "Version/", "Version/"), - ("Mozilla", "Mozilla", null), ("Konqueror", "Konqueror", null), ("iCab", "icab", null), ("Lynx", "Lynx", null), diff --git a/tests/HttpUserAgentParser.UnitTests/HttpUserAgentParserTests.cs b/tests/HttpUserAgentParser.UnitTests/HttpUserAgentParserTests.cs index 1d4a92c..b778535 100644 --- a/tests/HttpUserAgentParser.UnitTests/HttpUserAgentParserTests.cs +++ b/tests/HttpUserAgentParser.UnitTests/HttpUserAgentParserTests.cs @@ -173,4 +173,54 @@ public void BotTests(string ua, string name) Assert.False(uaInfo.IsMobile()); Assert.True(uaInfo.IsRobot()); } + + [Theory] + [InlineData("")] + [InlineData("???")] + [InlineData("NotAUserAgent")] + [InlineData("Mozilla")] + [InlineData("Mozilla/")] + [InlineData("()")] + [InlineData("UserAgent/")] + [InlineData("Bot/123 (")] + [InlineData("123456")] + [InlineData("curl")] + [InlineData("invalid/useragent")] + [InlineData("Mozilla (Windows)")] + [InlineData("Chrome/ABC")] + [InlineData(";;!!##")] + [InlineData("Safari/ ")] + [InlineData("Opera( )")] + [InlineData("Mozilla/5.0 (X11; ) Gecko")] + [InlineData("FakeUA/1.0 (Test)???")] + [InlineData("Mozilla/ (iPhone; U; CPU iPhone OS like Mac OS X) AppleWebKit/ (KHTML, like Gecko) Version/ Mobile/ Safari/")] + [InlineData("Mozzila/5.0 (Windows NT 10.0; Win64; x64)")] + [InlineData("Chorme/91.0.4472.124 (Windows NT 10.0; Win64; x64)")] + [InlineData("FireFoxx/89.0 (Macintosh; Intel Mac OS X 10_15_7)")] + [InlineData("Safarii/14.1 (iPhone; CPU iPhone OS 14_6 like Mac OS X)")] + [InlineData("InternetExploder/11.0 (Windows NT 6.1; WOW64)")] + [InlineData("Bravee/1.25.72 (Windows NT 10.0; Win64; x64)")] + [InlineData("Mozzila/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0)")] + [InlineData("Chromee/99.0.4758.102 (X11; Linux x86_64)")] + [InlineData("FirreFox/100.0 (Windows NT 10.0; rv:100.0)")] + [InlineData("Saffari/605.1.15 (iPad; CPU OS 14_6 like Mac OS X)")] + [InlineData("Edgg/103.0.1264.37 (Macintosh; Intel Mac OS X 11_5_2)")] + [InlineData("Chorome/91.0.4472.124 (Linux; Android 10; SM-G973F)")] + [InlineData("Edgee/18.18363 (Windows 10 1909; Win64; x64)")] + public void InvalidUserAgent(string userAgent) + { + HttpUserAgentInformation info = HttpUserAgentInformation.Parse(userAgent); + + // Invalid or malformed UAs must be classified as Unknown + Assert.Equal(HttpUserAgentType.Unknown, info.Type); + Assert.Null(info.Name); + Assert.Null(info.Version); + + // Parser trims input via Cleanup, so compare to trimmed UA + Assert.Equal(userAgent.Trim(), info.UserAgent); + + // Should not be considered a browser or a robot + Assert.False(info.IsBrowser()); + Assert.False(info.IsRobot()); + } }