Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Extend SearchValues<string> use in Regex to NonBacktracking
We previously only enabled this for the compiler. Support it with NonBacktracking, too.
  • Loading branch information
stephentoub committed Jun 14, 2024
commit 4881bbc9f90c045675a2c156e4422f9ac556edb6
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;

Expand Down Expand Up @@ -83,6 +84,7 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
bool dfa = (options & RegexOptions.NonBacktracking) != 0;
bool compiled = (options & RegexOptions.Compiled) != 0 && !dfa; // for now, we never generate code for NonBacktracking, so treat it as non-compiled
bool interpreter = !compiled && !dfa;
bool usesRfoTryFind = !compiled;

// For interpreter, we want to employ optimizations, but we don't want to make construction significantly
// more expensive; someone who wants to pay to do more work can specify Compiled. So for the interpreter
Expand Down Expand Up @@ -140,12 +142,18 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
// We're now left-to-right only and looking for multiple prefixes and/or sets.

// If there are multiple leading strings, we can search for any of them.
if (compiled)
if (!interpreter) // this works in the interpreter, but we avoid it due to additional cost during construction
{
if (RegexPrefixAnalyzer.FindPrefixes(root, ignoreCase: true) is { Length: > 1 } caseInsensitivePrefixes)
{
LeadingPrefixes = caseInsensitivePrefixes;
FindMode = FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight;
#if SYSTEM_TEXT_REGULAREXPRESSIONS
if (usesRfoTryFind)
{
LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.OrdinalIgnoreCase);
}
#endif
return;
}

Expand All @@ -156,6 +164,12 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
//{
// LeadingPrefixes = caseSensitivePrefixes;
// FindMode = FindNextStartingPositionMode.LeadingStrings_LeftToRight;
#if SYSTEM_TEXT_REGULAREXPRESSIONS
// if (usesRfoTryFind)
// {
// LeadingStrings = SearchValues.Create(LeadingPrefixes, StringComparison.Ordinal);
// }
#endif
// return;
//}
}
Expand Down Expand Up @@ -275,6 +289,11 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
/// <remarks>The case-insensitivity of the 0th entry will always match the mode selected, but subsequent entries may not.</remarks>
public List<FixedDistanceSet>? FixedDistanceSets { get; }

#if SYSTEM_TEXT_REGULAREXPRESSIONS
/// <summary>When in leading strings mode, gets the search values to use for searching the input.</summary>
public SearchValues<string>? LeadingStrings { get; }
#endif

/// <summary>Data about a character class at a fixed offset from the start of any match to a pattern.</summary>
public struct FixedDistanceSet(char[]? chars, string set, int distance)
{
Expand Down Expand Up @@ -676,6 +695,28 @@ public bool TryFindNextStartingPositionLeftToRight(ReadOnlySpan<char> textSpan,
return false;
}

// There are multiple possible strings at the beginning. Search for one.
case FindNextStartingPositionMode.LeadingStrings_LeftToRight:
case FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight:
{
if (LeadingStrings is not SearchValues<string> searchValues)
{
// This should be exceedingly rare and only happen if a Compiled regex selected this
// option but then failed to compile (e.g. due to too deep stacks) and fell back to the interpreter.
return true;
}

int i = textSpan.Slice(pos).IndexOfAny(searchValues);
if (i >= 0)
{
pos += i;
return true;
}

pos = textSpan.Length;
return false;
}

// There are one or more sets at fixed offsets from the start of the pattern.

case FindNextStartingPositionMode.FixedDistanceSets_LeftToRight:
Expand Down Expand Up @@ -800,12 +841,6 @@ public bool TryFindNextStartingPositionLeftToRight(ReadOnlySpan<char> textSpan,
return false;
}

// Not supported in the interpreter, but we could end up here for patterns so complex the compiler gave up on them.

case FindNextStartingPositionMode.LeadingStrings_LeftToRight:
case FindNextStartingPositionMode.LeadingStrings_OrdinalIgnoreCase_LeftToRight:
return true;

// Nothing special to look for. Just return true indicating this is a valid position to try to match.

default:
Expand Down