Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
34eba54
Regex automata optimizations
ieviev May 24, 2024
49607f4
off by one err
ieviev May 24, 2024
5ac29f3
wip reversal optimizations
ieviev May 26, 2024
e440dec
removing unnecessary overhead
ieviev May 26, 2024
627fd90
handle final position correctly
ieviev May 26, 2024
7ae6440
edge case workarounds, tests should be ok again
ieviev May 27, 2024
383f3e5
optimizing lookup initialization
ieviev May 27, 2024
5a2636c
more dfa overhead removed
ieviev May 28, 2024
57e5b8d
removed potential rewrite
ieviev May 28, 2024
4d275db
low memory variant
ieviev May 28, 2024
c35ed7e
some kind of compromise between speed and memory
ieviev May 28, 2024
868e02d
cheaper nullability checks
ieviev May 29, 2024
14afd18
nullability encoding
ieviev May 29, 2024
5f5ab55
nullability cached as bytes
ieviev May 29, 2024
dd121de
reverting some changes
ieviev May 30, 2024
723c5b6
testing nfa fallback
ieviev Jun 5, 2024
6bf4095
refactoring, work in progress
ieviev Jun 17, 2024
b10e600
refactoring to struct interfaces
ieviev Jun 18, 2024
d68bd3c
refactoring optimizations
ieviev Jun 18, 2024
153dfc3
fallback mode and bugfix
ieviev Jun 18, 2024
4aebe3e
reenable warnings
ieviev Jun 18, 2024
1e6f55c
anchor edge case
ieviev Jun 19, 2024
c6ad3ac
anchor edge cases
ieviev Jun 19, 2024
e10b43f
Apply suggestions from code review
ieviev Jun 19, 2024
f581755
Apply suggestions from code review
ieviev Jun 27, 2024
01a9684
rebased branch and some cleanup
ieviev Jun 27, 2024
341ce27
cleanup, removing unused features
ieviev Jun 27, 2024
1a28c69
cleanup
ieviev Jun 27, 2024
9bba84f
timeout limit changes
ieviev Jun 29, 2024
a957781
lookup allocation threshold and timeout limits
ieviev Jun 30, 2024
7e86855
char mapping
ieviev Jun 30, 2024
99b5717
empty array mapping
ieviev Jun 30, 2024
47c6b04
adding timeout check to create-derivative
ieviev Jun 30, 2024
22d23fa
some cleanup
ieviev Jun 30, 2024
761f897
comments and cleanup
ieviev Jun 30, 2024
53924eb
cleanup and comments
ieviev Jun 30, 2024
e66d3d3
reflecting new limits in tests
ieviev Jul 1, 2024
65c0b8b
rerunning tests
ieviev Jul 1, 2024
de085b4
retesting DFA timeout
ieviev Jul 1, 2024
5ef3b32
more precise regex memory limit for DFA mode
ieviev Jul 2, 2024
281446f
reverting change
ieviev Jul 2, 2024
8f78046
reverting reversal refactor
ieviev Jul 3, 2024
7157520
Apply suggestions from code review
ieviev Jul 3, 2024
931552d
variable naming
ieviev Jul 3, 2024
cc493f1
test for over 255 minterms
ieviev Jul 3, 2024
a0d2390
adding net directive around test
ieviev Jul 3, 2024
0691c58
all engines in minterms test
ieviev Jul 3, 2024
8ceb207
Apply suggestions from code review
ieviev Jul 3, 2024
379519b
Apply suggestions from code review
ieviev Jul 3, 2024
57c8f6d
simplifying code
ieviev Jul 3, 2024
2e57d42
state flag values down
ieviev Jul 3, 2024
60b1352
mintermclassifier changes
ieviev Jul 3, 2024
2900aad
reversal
ieviev Jul 4, 2024
764ded8
getstateflags
ieviev Jul 4, 2024
81d0dca
formatting
ieviev Jul 4, 2024
38f28b9
removing unused interface
ieviev Jul 4, 2024
cce1188
local function typo
ieviev Jul 4, 2024
8b946da
temporarily removing minterms test
ieviev Jul 5, 2024
d3430b3
re-adding minterms test
ieviev Jul 6, 2024
388c256
reenabling test for all engines
ieviev Jul 8, 2024
2704641
test bugfix
ieviev Jul 8, 2024
0abaabe
expected matches change
ieviev Jul 8, 2024
0a0f409
Review and clean up some code
stephentoub Jul 10, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
wip reversal optimizations
  • Loading branch information
ieviev authored and stephentoub committed Jul 10, 2024
commit 5ac29f36906e6afff5b3b1835d7155ff524e4bf6
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<DefineConstants>$(DefineConstants);SYSTEM_TEXT_REGULAREXPRESSIONS</DefineConstants>
<UseCompilerGeneratedDocXmlFile>false</UseCompilerGeneratedDocXmlFile>
<!-- <NoWarn>IL2026;IL2075;IDE0059;CA1823</NoWarn>-->

</PropertyGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ internal sealed partial class SymbolicRegexMatcher<TSet>
/// </summary>
private bool[] _canBeAcceleratedArray;

#if DEBUG
// private readonly Action<string> _wout = st =>
// {
// var a_cons = System.Reflection.Assembly.Load("System.Console");
// var t_cons = a_cons.GetType("System.Console")!;
// var wl = t_cons.GetMethod("WriteLine", [typeof(string)]);
// wl!.Invoke(null, [st]);
// };
#endif
/// <summary>
/// The transition function for DFA mode.
/// Each state has a range of consecutive entries for each minterm ID. A range of size 2^L, where L is
Expand Down Expand Up @@ -162,6 +171,69 @@ private MatchingState<TSet> GetOrCreateState(SymbolicRegexNode<TSet> node, uint
return GetOrCreateState_NoLock(node, prevCharKind);
}

/// <summary>
/// Optimized reversal state computation which takes skips the fixed length parts
/// </summary>
/// <param name="node"></param>
/// <returns></returns>
private (int, MatchingState<TSet>) CreateOptimizedReversal(SymbolicRegexNode<TSet> node)
{
var pos = 0;
var current = node;
var canLoop = true;
var incrPos = new Func<(int, SymbolicRegexNode<TSet>), (bool, SymbolicRegexNode<TSet>)>(value =>
{
pos += value.Item1;
return (true, value.Item2);
});
var decrLoop = new Func<SymbolicRegexNode<TSet>, (bool, SymbolicRegexNode<TSet>)>(value =>
{
var concat = value;
var loop = concat._left;
switch (loop!._left!.Kind)
{
case SymbolicRegexNodeKind.Singleton:
if (loop._lower == loop._upper)
{
pos += loop._lower;
return (true, concat._right!);
}
if (loop._lower > 0)
{
var delta = loop._upper - loop._lower;
var newLeft = _builder.CreateLoop(loop._left, loop.IsLazy, 0, delta);
var newNode = _builder.CreateConcat(newLeft, concat._right!);
pos += loop._lower;
return (true, newNode);
}
return (false, concat);
default:
return (false, concat);
}
});
while (canLoop)
{
#if DEBUG
// _wout($"{pos} {current._kind} l:{current._left!._kind} {current}");
#endif
(bool loop, SymbolicRegexNode<TSet> next) = current switch
{
{_kind:SymbolicRegexNodeKind.Concat, _left._kind: SymbolicRegexNodeKind.CaptureEnd} =>
(true, current._right!),
{_kind: SymbolicRegexNodeKind.Concat, _left._kind: SymbolicRegexNodeKind.BoundaryAnchor } =>
(true, current._right!),
{_kind:SymbolicRegexNodeKind.Concat, _left._kind: SymbolicRegexNodeKind.Singleton} =>
incrPos((1, current._right!)),
{_kind: SymbolicRegexNodeKind.Concat, _left._kind: SymbolicRegexNodeKind.Loop } =>
decrLoop(current),
_ => (false, current)
};
canLoop = loop;
current = next;
}
return (pos, GetOrCreateState_NoLock(_builder.CreateDisableBacktrackingSimulation(current), 0, false));
}

/// <summary>
/// Create a state with given node and previous character context.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ internal sealed partial class SymbolicRegexMatcher<TSet> : SymbolicRegexMatcher
/// <remarks>If the pattern doesn't contain any anchors, there will only be a single initial state.</remarks>
private readonly MatchingState<TSet>[] _reverseInitialStates;

private readonly (int, MatchingState<TSet>) _optimizedReversalState;

/// <summary>Partition of the input space of sets.</summary>
private readonly TSet[] _minterms;

Expand Down Expand Up @@ -172,6 +174,7 @@ private SymbolicRegexMatcher(SymbolicRegexBuilder<TSet> builder, SymbolicRegexNo
((BitVectorSolver)(object)builder._solver)._classifier;
_capsize = captureCount;


// Initialization for fields in SymbolicRegexMatcher.Automata.cs
_stateArray = new MatchingState<TSet>[InitialDfaStateCapacity];
_stateFlagsArray = new StateFlags[InitialDfaStateCapacity];
Expand Down Expand Up @@ -262,6 +265,9 @@ private SymbolicRegexMatcher(SymbolicRegexBuilder<TSet> builder, SymbolicRegexNo
}
_reverseInitialStates = reverseInitialStates;

// Create optimized reversal
_optimizedReversalState = CreateOptimizedReversal(_pattern.Reverse(builder));

// Maps a minterm ID to a character kind
uint CalculateMintermIdKind(int mintermId)
{
Expand Down Expand Up @@ -776,9 +782,29 @@ private int FindStartPosition<TInputReader, TNullabilityHandler>(ReadOnlySpan<ch

// Get the starting state for the reverse pattern. This depends on previous character (which, because we're
// going backwards, is character number i).
var currentState = new CurrentState(_reverseInitialStates[GetCharKind<TInputReader>(input, i)]);

CurrentState currentState;
int lastStart = -1; // invalid sentinel value
// if possible use optimized reversal instead
if (_optimizedReversalState.Item1 > 0)
{
i -= _optimizedReversalState.Item1;
currentState = new CurrentState(_optimizedReversalState.Item2);
// anchor variant may need context to be computed if nullable
if (_pattern._info.ContainsSomeAnchor && _canBeNullableArray[currentState.DfaStateId])
{
int positionId = TInputReader.GetPositionId(this, input, i);
if (TNullabilityHandler.IsNullableAt<DfaStateHandler>(this,
in currentState, positionId,
DfaStateHandler.GetStateFlags(this, in currentState)))
{
lastStart = i;
}
}
}
else
{
currentState = new CurrentState(_reverseInitialStates[GetCharKind<TInputReader>(input, i)]);
}

// Walk backwards to the furthest accepting state of the reverse pattern but no earlier than matchStartBoundary.
while (true)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections;
using System.Collections.Generic;
using Xunit;

namespace System.Text.RegularExpressions.Tests
{
/// <summary>
/// TODO: Create tests here later
/// </summary>
public static partial class NonBacktrackingTests
{

// [Fact]
// public static void Test()
// {
// }

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
<Compile Include="GroupCollectionTests.cs" />
<Compile Include="MatchCollectionTests.cs" />
<Compile Include="MonoRegexTests.cs" />
<Compile Include="NonBacktrackingTests.cs" />
<Compile Include="Regex.CompileToAssembly.Tests.cs" />
<Compile Include="Regex.Ctor.Tests.cs" />
<Compile Include="Regex.Cache.Tests.cs" />
Expand Down