diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index 31c5fe39d77240..2f855fb2044cfe 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -10,6 +10,7 @@ using System.Globalization; using System.IO; using System.Linq; +using System.Security.Cryptography; using System.Threading; using Microsoft.CodeAnalysis; using Microsoft.CodeAnalysis.CSharp; @@ -256,7 +257,7 @@ void ExitCheckOverflow() writer.WriteLine($" {{"); writer.Indent += 3; EnterCheckOverflow(); - EmitTryFindNextPossibleStartingPosition(writer, rm, requiredHelpers); + EmitTryFindNextPossibleStartingPosition(writer, rm, requiredHelpers, checkOverflow); ExitCheckOverflow(); writer.Indent -= 3; writer.WriteLine($" }}"); @@ -405,25 +406,43 @@ private static string EmitIndexOfAnyValues(char[] asciiChars, Dictionary "s_asciiControl", + "000000000000FF030000000000000000" => "s_asciiDigits", "0000000000000000FEFFFF07FEFFFF07" => "s_asciiLetters", "000000000000FF03FEFFFF07FEFFFF07" => "s_asciiLettersAndDigits", "000000000000FF037E0000007E000000" => "s_asciiHexDigits", "000000000000FF03000000007E000000" => "s_asciiHexDigitsLower", "000000000000FF037E00000000000000" => "s_asciiHexDigitsUpper", + "00000000EEF7008C010000B800000028" => "s_asciiPunctuation", + "00000000010000000000000000000000" => "s_asciiSeparators", + "00000000100800700000004001000050" => "s_asciiSymbols", + "003E0000010000000000000000000000" => "s_asciiWhiteSpace", + "000000000000FF03FEFFFF87FEFFFF07" => "s_asciiWordChars", + + "00000000FFFFFFFFFFFFFFFFFFFFFF7F" => "s_asciiExceptControl", + "FFFFFFFFFFFF00FCFFFFFFFFFFFFFFFF" => "s_asciiExceptDigits", + "FFFFFFFFFFFFFFFF010000F8010000F8" => "s_asciiExceptLetters", + "FFFFFFFFFFFF00FC010000F8010000F8" => "s_asciiExceptLettersAndDigits", + "FFFFFFFFFFFFFFFFFFFFFFFF010000F8" => "s_asciiExceptLower", + "FFFFFFFF1108FF73FEFFFF47FFFFFFD7" => "s_asciiExceptPunctuation", + "FFFFFFFFFEFFFFFFFFFFFFFFFFFFFFFF" => "s_asciiExceptSeparators", + "FFFFFFFFEFF7FF8FFFFFFFBFFEFFFFAF" => "s_asciiExceptSymbols", + "FFFFFFFFFFFFFFFF010000F8FFFFFFFF" => "s_asciiExceptUpper", + "FFC1FFFFFEFFFFFFFFFFFFFFFFFFFFFF" => "s_asciiExceptWhiteSpace", + "FFFFFFFFFFFF00FC01000078010000F8" => "s_asciiExceptWordChars", + _ => $"s_ascii_{hexBitmap.TrimStart('0')}" }; - string helperName = $"IndexOfAnyValues_{fieldName}"; - - if (!requiredHelpers.ContainsKey(helperName)) + if (!requiredHelpers.ContainsKey(fieldName)) { Array.Sort(asciiChars); string setLiteral = Literal(new string(asciiChars)); - requiredHelpers.Add(helperName, new string[] + requiredHelpers.Add(fieldName, new string[] { - $"/// Cached data to efficiently search for a character in the set {EscapeXmlComment(setLiteral)}.", + $"/// Supports searching for characters in or not in {EscapeXmlComment(setLiteral)}.", $"internal static readonly IndexOfAnyValues {fieldName} = IndexOfAnyValues.Create({setLiteral});", }); } @@ -431,6 +450,118 @@ private static string EmitIndexOfAnyValues(char[] asciiChars, Dictionary requiredHelpers, bool checkOverflow) + { + // In order to optimize the search for ASCII characters, we use IndexOfAnyValues to vectorize a search + // for those characters plus anything non-ASCII (if we find something non-ASCII, we'll fall back to + // a sequential walk). In order to do that search, we actually build up a set for all of the ASCII + // characters _not_ contained in the set, and then do a search for the inverse of that, which will be + // all of the target ASCII characters and all of non-ASCII. + var asciiChars = new List(); + for (int i = 0; i <= 0x7f; i++) + { + if (!RegexCharClass.CharInClass((char)i, set)) + { + asciiChars.Add((char)i); + } + } + + string? helperName = set switch + { + RegexCharClass.DigitClass => "IndexOfAnyDigit", + RegexCharClass.ControlClass => "IndexOfAnyControl", + RegexCharClass.LetterClass => "IndexOfAnyLetter", + RegexCharClass.LetterOrDigitClass => "IndexOfAnyLetterOrDigit", + RegexCharClass.LowerClass => "IndexOfAnyLower", + RegexCharClass.NumberClass => "IndexOfAnyNumber", + RegexCharClass.PunctuationClass => "IndexOfAnyPunctuation", + RegexCharClass.SeparatorClass => "IndexOfAnySeparator", + RegexCharClass.SpaceClass => "IndexOfAnyWhiteSpace", + RegexCharClass.SymbolClass => "IndexOfAnySymbol", + RegexCharClass.UpperClass => "IndexOfAnyUpper", + RegexCharClass.WordClass => "IndexOfAnyWordChar", + + RegexCharClass.NotDigitClass => "IndexOfAnyExceptDigit", + RegexCharClass.NotControlClass => "IndexOfAnyExceptControl", + RegexCharClass.NotLetterClass => "IndexOfAnyExceptLetter", + RegexCharClass.NotLetterOrDigitClass => "IndexOfAnyExceptLetterOrDigit", + RegexCharClass.NotLowerClass => "IndexOfAnyExceptLower", + RegexCharClass.NotNumberClass => "IndexOfAnyExceptNumber", + RegexCharClass.NotPunctuationClass => "IndexOfAnyExceptPunctuation", + RegexCharClass.NotSeparatorClass => "IndexOfAnyExceptSeparator", + RegexCharClass.NotSpaceClass => "IndexOfAnyExceptWhiteSpace", + RegexCharClass.NotSymbolClass => "IndexOfAnyExceptSymbol", + RegexCharClass.NotUpperClass => "IndexOfAnyExceptUpper", + RegexCharClass.NotWordClass => "IndexOfAnyExceptWordChar", + + _ => null, + }; + if (helperName is null) + { + using (SHA256 sha = SHA256.Create()) + { +#pragma warning disable CA1850 // SHA256.HashData isn't available on netstandard2.0 + helperName = $"IndexOfNonAsciiOrAny_{BitConverter.ToString(sha.ComputeHash(Encoding.UTF8.GetBytes(set))).Replace("-", "")}"; +#pragma warning restore CA1850 + } + } + + if (!requiredHelpers.ContainsKey(helperName)) + { + var additionalDeclarations = new HashSet(); + string matchExpr = MatchCharacterClass("span[i]", set, negate: false, additionalDeclarations, requiredHelpers); + + var lines = new List(); + lines.Add($"/// Finds the next index of any character that matches {EscapeXmlComment(DescribeSet(set))}."); + lines.Add($"[MethodImpl(MethodImplOptions.AggressiveInlining)]"); + lines.Add($"internal static int {helperName}(this ReadOnlySpan span)"); + lines.Add($"{{"); + int uncheckedStart = lines.Count; + lines.Add(asciiChars.Count == 128 ? + $" int i = span.IndexOfAnyExceptInRange('\0', '\u007f');" : + $" int i = span.IndexOfAnyExcept({EmitIndexOfAnyValues(asciiChars.ToArray(), requiredHelpers)});"); + lines.Add($" if ((uint)i < (uint)span.Length)"); + lines.Add($" {{"); + lines.Add($" if (span[i] <= 0x7f)"); + lines.Add($" {{"); + lines.Add($" return i;"); + lines.Add($" }}"); + lines.Add($""); + if (additionalDeclarations.Count > 0) + { + lines.AddRange(additionalDeclarations.Select(s => $" {s}")); + } + lines.Add($" do"); + lines.Add($" {{"); + lines.Add($" if ({matchExpr})"); + lines.Add($" {{"); + lines.Add($" return i;"); + lines.Add($" }}"); + lines.Add($" i++;"); + lines.Add($" }}"); + lines.Add($" while ((uint)i < (uint)span.Length);"); + lines.Add($" }}"); + lines.Add($""); + lines.Add($" return -1;"); + lines.Add($"}}"); + + if (checkOverflow) + { + lines.Insert(uncheckedStart, " unchecked"); + lines.Insert(uncheckedStart + 1, " {"); + for (int i = uncheckedStart + 2; i < lines.Count - 1; i++) + { + lines[i] = $" {lines[i]}"; + } + lines.Insert(lines.Count - 1, " }"); + } + + requiredHelpers.Add(helperName, lines.ToArray()); + } + + return helperName; + } + /// Emits the body of the Scan method override. private static (bool NeedsTryFind, bool NeedsTryMatch) EmitScan(IndentedTextWriter writer, RegexMethod rm) { @@ -518,7 +649,7 @@ FindNextStartingPositionMode.LeadingAnchor_RightToLeft_Start or } /// Emits the body of the TryFindNextPossibleStartingPosition. - private static void EmitTryFindNextPossibleStartingPosition(IndentedTextWriter writer, RegexMethod rm, Dictionary requiredHelpers) + private static void EmitTryFindNextPossibleStartingPosition(IndentedTextWriter writer, RegexMethod rm, Dictionary requiredHelpers, bool checkOverflow) { RegexOptions options = rm.Options; RegexTree regexTree = rm.Tree; @@ -871,14 +1002,11 @@ void EmitFixedSet_LeftToRight() $"// The pattern matches {DescribeSet(primarySet.Set)} at index {primarySet.Distance}."); writer.WriteLine($"// Find the next occurrence. If it can't be found, there's no match."); - // If we can use IndexOf{Any}, try to accelerate the skip loop via vectorization to match the first prefix. - // We can use it if this is a case-sensitive class with a small number of characters in the class. - // We avoid using it for the relatively common case of the starting set being '.', aka anything other than + // Use IndexOf{Any} to accelerate the skip loop via vectorization to match the first prefix. + // But we avoid using it for the relatively common case of the starting set being '.', aka anything other than // a newline, as it's very rare to have long, uninterrupted sequences of newlines. int setIndex = 0; - bool canUseIndexOf = - primarySet.Set != RegexCharClass.NotNewLineClass && - (primarySet.Chars is not null || primarySet.Range is not null || primarySet.AsciiSet is not null); + bool canUseIndexOf = primarySet.Set != RegexCharClass.NotNewLineClass; bool needLoop = !canUseIndexOf || setsToUse > 1; FinishEmitBlock loopBlock = default; @@ -914,13 +1042,14 @@ void EmitFixedSet_LeftToRight() _ => $"{span}.IndexOfAny({EmitIndexOfAnyValuesOrLiteral(primarySet.Chars, requiredHelpers)})", } : primarySet.AsciiSet is not null ? $"{span}.IndexOfAny({EmitIndexOfAnyValues(primarySet.AsciiSet, requiredHelpers)})" : - (primarySet.Range!.Value.LowInclusive == primarySet.Range.Value.HighInclusive, primarySet.Negated) switch + primarySet.Range is not null ? (primarySet.Range.Value.LowInclusive == primarySet.Range.Value.HighInclusive, primarySet.Negated) switch { (false, false) => $"{span}.IndexOfAnyInRange({Literal(primarySet.Range.Value.LowInclusive)}, {Literal(primarySet.Range.Value.HighInclusive)})", (true, false) => $"{span}.IndexOf({Literal(primarySet.Range.Value.LowInclusive)})", (false, true) => $"{span}.IndexOfAnyExceptInRange({Literal(primarySet.Range.Value.LowInclusive)}, {Literal(primarySet.Range.Value.HighInclusive)})", (true, true) => $"{span}.IndexOfAnyExcept({Literal(primarySet.Range.Value.LowInclusive)})", - }; + } : + $"{span}.{EmitIndexOfAnyCustomHelper(primarySet.Set, requiredHelpers, checkOverflow)}()"; if (needLoop) { @@ -983,7 +1112,7 @@ void EmitFixedSet_LeftToRight() for (; setIndex < setsToUse; setIndex++) { string spanIndex = $"span[i{(sets[setIndex].Distance > 0 ? $" + {sets[setIndex].Distance}" : "")}]"; - string charInClassExpr = MatchCharacterClass(options, spanIndex, sets[setIndex].Set, negate: false, additionalDeclarations, requiredHelpers); + string charInClassExpr = MatchCharacterClass(spanIndex, sets[setIndex].Set, negate: false, additionalDeclarations, requiredHelpers); if (setIndex == start) { @@ -1034,7 +1163,7 @@ void EmitFixedSet_RightToLeft() { using (EmitBlock(writer, "while ((uint)--pos < (uint)inputSpan.Length)")) { - using (EmitBlock(writer, $"if ({MatchCharacterClass(options, "inputSpan[pos]", set.Set, negate: false, additionalDeclarations, requiredHelpers)})")) + using (EmitBlock(writer, $"if ({MatchCharacterClass("inputSpan[pos]", set.Set, negate: false, additionalDeclarations, requiredHelpers)})")) { writer.WriteLine("base.runtextpos = pos + 1;"); writer.WriteLine("return true;"); @@ -1100,7 +1229,7 @@ void EmitLiteralAfterAtomicLoop() // We found the literal. Walk backwards from it finding as many matches as we can against the loop. writer.WriteLine("int prev = i - 1;"); - using (EmitBlock(writer, $"while ((uint)prev < (uint)slice.Length && {MatchCharacterClass(options, "slice[prev]", target.LoopNode.Str!, negate: false, additionalDeclarations, requiredHelpers)})")) + using (EmitBlock(writer, $"while ((uint)prev < (uint)slice.Length && {MatchCharacterClass("slice[prev]", target.LoopNode.Str!, negate: false, additionalDeclarations, requiredHelpers)})")) { writer.WriteLine("prev--;"); } @@ -2729,7 +2858,7 @@ void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, string? offset if (node.IsSetFamily) { - expr = MatchCharacterClass(options, expr, node.Str!, negate: true, additionalDeclarations, requiredHelpers); + expr = MatchCharacterClass(expr, node.Str!, negate: true, additionalDeclarations, requiredHelpers); } else { @@ -3746,7 +3875,7 @@ void EmitSingleCharAtomicLoop(RegexNode node, bool emitLengthChecksIfRequired = string expr = $"inputSpan[pos - {iterationLocal} - 1]"; if (node.IsSetFamily) { - expr = MatchCharacterClass(options, expr, node.Str!, negate: false, additionalDeclarations, requiredHelpers); + expr = MatchCharacterClass(expr, node.Str!, negate: false, additionalDeclarations, requiredHelpers); } else { @@ -3795,7 +3924,7 @@ void EmitSingleCharAtomicLoop(RegexNode node, bool emitLengthChecksIfRequired = string expr = $"{sliceSpan}[{iterationLocal}]"; expr = node.IsSetFamily ? - MatchCharacterClass(options, expr, node.Str!, negate: false, additionalDeclarations, requiredHelpers) : + MatchCharacterClass(expr, node.Str!, negate: false, additionalDeclarations, requiredHelpers) : $"{expr} {(node.IsOneFamily ? "==" : "!=")} {Literal(node.Ch)}"; if (minIterations != 0 || maxIterations != int.MaxValue) @@ -3859,7 +3988,7 @@ void EmitAtomicSingleCharZeroOrOne(RegexNode node) if (node.IsSetFamily) { - expr = MatchCharacterClass(options, expr, node.Str!, negate: false, additionalDeclarations, requiredHelpers); + expr = MatchCharacterClass(expr, node.Str!, negate: false, additionalDeclarations, requiredHelpers); } else { @@ -4512,7 +4641,7 @@ private static bool TryEmitIndexOf( return false; } - private static string MatchCharacterClass(RegexOptions options, string chExpr, string charClass, bool negate, HashSet additionalDeclarations, Dictionary requiredHelpers) + private static string MatchCharacterClass(string chExpr, string charClass, bool negate, HashSet additionalDeclarations, Dictionary requiredHelpers) { // We need to perform the equivalent of calling RegexRunner.CharInClass(ch, charClass), // but that call is relatively expensive. Before we fall back to it, we try to optimize @@ -4781,6 +4910,19 @@ private static string MatchCharacterClass(RegexOptions options, string chExpr, s $"({range0Clause} | {range1Clause})"; } + const string Base = nameof(Base); + if (!requiredHelpers.ContainsKey(Base)) + { + requiredHelpers.Add(Base, new string[] + { + $"internal class {Base} : RegexRunner", + $"{{", + $" /// Determines whether the specified character in is in the specified character class.", + $" internal static new bool CharInClass(char ch, string charClass) => RegexRunner.CharInClass(ch, charClass);", + $"}}", + }); + } + if (analysis.ContainsNoAscii) { // We determined that the character class contains only non-ASCII, @@ -4877,20 +5019,20 @@ private static string MatchCharacterClass(RegexOptions options, string chExpr, s _ => $"({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) {(negate ? "=" : "!")}= 0", }; - return $"((ch = {chExpr}) < 128 ? {asciiExpr} : {(negate ? "!" : "")}RegexRunner.CharInClass((char)ch, {Literal(charClass)}))"; + return $"((ch = {chExpr}) < 128 ? {asciiExpr} : {(negate ? "!" : "")}{HelpersTypeName}.{Base}.CharInClass((char)ch, {Literal(charClass)}))"; string EmitContainsNoAscii() { return negate ? - $"((ch = {chExpr}) < 128 || !RegexRunner.CharInClass((char)ch, {Literal(charClass)}))" : - $"((ch = {chExpr}) >= 128 && RegexRunner.CharInClass((char)ch, {Literal(charClass)}))"; + $"((ch = {chExpr}) < 128 || !{HelpersTypeName}.{Base}.CharInClass((char)ch, {Literal(charClass)}))" : + $"((ch = {chExpr}) >= 128 && {HelpersTypeName}.{Base}.CharInClass((char)ch, {Literal(charClass)}))"; } string EmitAllAsciiContained() { return negate ? - $"((ch = {chExpr}) >= 128 && !RegexRunner.CharInClass((char)ch, {Literal(charClass)}))" : - $"((ch = {chExpr}) < 128 || RegexRunner.CharInClass((char)ch, {Literal(charClass)}))"; + $"((ch = {chExpr}) >= 128 && !{HelpersTypeName}.{Base}.CharInClass((char)ch, {Literal(charClass)}))" : + $"((ch = {chExpr}) < 128 || {HelpersTypeName}.{Base}.CharInClass((char)ch, {Literal(charClass)}))"; } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index e94567b7ac9cca..d44a7e63b5b723 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -8,6 +8,7 @@ using System.Globalization; using System.Reflection; using System.Reflection.Emit; +using System.Runtime.InteropServices; using System.Threading; namespace System.Text.RegularExpressions @@ -831,14 +832,11 @@ void EmitFixedSet_LeftToRight() Call(s_spanSliceIntMethod); Stloc(textSpanLocal); - // If we can use IndexOf{Any}, try to accelerate the skip loop via vectorization to match the first prefix. - // We can use it if this is a case-sensitive class with a small number of characters in the class. - // We avoid using it for the relatively common case of the starting set being '.', aka anything other than + // Use IndexOf{Any} to accelerate the skip loop via vectorization to match the first prefix. + // But we avoid using it for the relatively common case of the starting set being '.', aka anything other than // a newline, as it's very rare to have long, uninterrupted sequences of newlines. int setIndex = 0; - bool canUseIndexOf = - primarySet.Set != RegexCharClass.NotNewLineClass && - (primarySet.Chars is not null || primarySet.Range is not null || primarySet.AsciiSet is not null); + bool canUseIndexOf = primarySet.Set != RegexCharClass.NotNewLineClass; bool needLoop = !canUseIndexOf || setsToUse > 1; Label checkSpanLengthLabel = default; @@ -926,9 +924,9 @@ void EmitFixedSet_LeftToRight() LoadIndexOfAnyValues(primarySet.AsciiSet); Call(s_spanIndexOfAnyIndexOfAnyValues); } - else + else if (primarySet.Range is not null) { - if (primarySet.Range!.Value.LowInclusive == primarySet.Range.Value.HighInclusive) + if (primarySet.Range.Value.LowInclusive == primarySet.Range.Value.HighInclusive) { // tmp = ...IndexOf{AnyExcept}(low); Ldc(primarySet.Range.Value.LowInclusive); @@ -942,6 +940,92 @@ void EmitFixedSet_LeftToRight() Call(primarySet.Negated ? s_spanIndexOfAnyExceptInRange : s_spanIndexOfAnyInRange); } } + else + { + // In order to optimize the search for ASCII characters, we use IndexOfAnyValues to vectorize a search + // for those characters plus anything non-ASCII (if we find something non-ASCII, we'll fall back to + // a sequential walk). In order to do that search, we actually build up a set for all of the ASCII + // characters _not_ contained in the set, and then do a search for the inverse of that, which will be + // all of the target ASCII characters and all of non-ASCII. + var asciiChars = new List(); + for (int i = 0; i <= 0x7f; i++) + { + if (!RegexCharClass.CharInClass((char)i, primarySet.Set)) + { + asciiChars.Add((char)i); + } + } + + using (RentedLocalBuilder span = RentReadOnlySpanCharLocal()) + using (RentedLocalBuilder i = RentInt32Local()) + { + // ReadOnlySpan span = inputSpan...; + Stloc(span); + + // int i = span. + Ldloc(span); + if (asciiChars.Count == 128) + { + // IndexOfAnyExceptInRange('\0', '\u007f'); + Ldc(0); + Ldc(127); + Call(s_spanIndexOfAnyExceptInRange); + } + else + { + // IndexOfAnyExcept(indexOfAnyValuesArray[...]); + LoadIndexOfAnyValues(CollectionsMarshal.AsSpan(asciiChars)); + Call(s_spanIndexOfAnyExceptIndexOfAnyValues); + } + Stloc(i); + + // if ((uint)i >= span.Length) goto doneSearch; + Label doneSearch = DefineLabel(); + Ldloc(i); + Ldloca(span); + Call(s_spanGetLengthMethod); + BgeUnFar(doneSearch); + + // if (span[i] <= 0x7f) goto doneSearch; + Ldc(0x7f); + Ldloca(span); + Ldloc(i); + Call(s_spanGetItemMethod); + LdindU2(); + BgeUnFar(doneSearch); + + Label loop = DefineLabel(); + MarkLabel(loop); + // do { ... + + // if (CharInClass(span[i])) goto doneSearch; + Ldloca(span); + Ldloc(i); + Call(s_spanGetItemMethod); + LdindU2(); + EmitMatchCharacterClass(primarySet.Set); + Brtrue(doneSearch); + + // i++; + Ldloc(i); + Ldc(1); + Add(); + Stloc(i); + + // } while ((uint)i < span.Length); + Ldloc(i); + Ldloca(span); + Call(s_spanGetLengthMethod); + BltUnFar(loop); + + // i = -1; + Ldc(-1); + Stloc(i); + + MarkLabel(doneSearch); + Ldloc(i); + } + } if (needLoop) { @@ -6008,7 +6092,7 @@ private void EmitTimeoutCheckIfNeeded() /// /// Adds an entry in for the given and emits a load of that initialized value. /// - private void LoadIndexOfAnyValues(char[] chars) + private void LoadIndexOfAnyValues(ReadOnlySpan chars) { List> list = _indexOfAnyValues ??= new(); int index = list.Count; diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs index 8187caf7682671..4a770a36583df4 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorOutputTests.cs @@ -660,7 +660,7 @@ private bool TryMatchAtCurrentPosition(ReadOnlySpan inputSpan) // Match a character in the set [^>\s] atomically at least once. { int iteration3 = 0; - while ((uint)iteration3 < (uint)slice.Length && ((ch = slice[iteration3]) < 128 ? ("쇿\uffff\ufffe뿿\uffff\uffff\uffff\uffff"[ch >> 4] & (1 << (ch & 0xF))) != 0 : RegexRunner.CharInClass((char)ch, "\u0001\u0002\u0001>?d"))) + while ((uint)iteration3 < (uint)slice.Length && ((ch = slice[iteration3]) < 128 ? ("쇿\uffff\ufffe뿿\uffff\uffff\uffff\uffff"[ch >> 4] & (1 << (ch & 0xF))) != 0 : Utilities.Base.CharInClass((char)ch, "\u0001\u0002\u0001>?d"))) { iteration3++; } @@ -702,6 +702,16 @@ void UncaptureUntil(int capturePosition) } + /// Helper methods used by generated -derived implementations. + [GeneratedCodeAttribute("System.Text.RegularExpressions.Generator", "42.42.42.42")] + file static class Utilities + { + internal class Base : RegexRunner + { + /// Determines whether the specified character in is in the specified character class. + internal static new bool CharInClass(char ch, string charClass) => RegexRunner.CharInClass(ch, charClass); + } + } } """ }; @@ -864,7 +874,7 @@ file static class Utilities /// Whether is non-infinite. internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout; - /// Cached data to efficiently search for a character in the set "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz". + /// Supports searching for characters in or not in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz". internal static readonly IndexOfAnyValues s_asciiLetters = IndexOfAnyValues.Create("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); } }