diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MintermClassifier.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MintermClassifier.cs index 24d2a26f849229..bf4735f2bf1459 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MintermClassifier.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MintermClassifier.cs @@ -47,8 +47,11 @@ public MintermClassifier(BDD[] minterms) // in order to size the lookup array to minimize steady-state memory consumption of the potentially // large lookup array. We prefer to use the byte[] _lookup when possible, in order to keep memory // consumption to a minimum; doing so accomodates up to 255 minterms, which is the vast majority case. - // However, when there are more than 255 minterms, we need to use int[] _intLookup. - (uint, uint)[][] charRangesPerMinterm = ArrayPool<(uint, uint)[]>.Shared.Rent(minterms.Length); + // However, when there are more than 255 minterms, we need to use int[] _intLookup. We rent an object[] + // rather than a (uint,uint)[][] to avoid the extra type pressure on the ArrayPool (object[]s are common, + // (uint,uint)[][]s much less so). + object[] arrayPoolArray = ArrayPool.Shared.Rent(minterms.Length); + Span charRangesPerMinterm = arrayPoolArray.AsSpan(0, minterms.Length); int maxChar = -1; for (int mintermId = 1; mintermId < minterms.Length; mintermId++) @@ -70,17 +73,17 @@ public MintermClassifier(BDD[] minterms) } // Return the rented array. We clear it before returning it in order to avoid all the ranges arrays being kept alive. - Array.Clear(charRangesPerMinterm, 0, minterms.Length); - ArrayPool<(uint, uint)[]>.Shared.Return(charRangesPerMinterm); + charRangesPerMinterm.Clear(); + ArrayPool.Shared.Return(arrayPoolArray); - // Creates the lookup array. - static T[] CreateLookup(BDD[] minterms, ReadOnlySpan<(uint, uint)[]> charRangesPerMinterm, int _maxChar) where T : IBinaryInteger + // Creates the lookup array. charRangesPerMinterm needs to have already been populated with (uint, uint)[] instances. + static T[] CreateLookup(BDD[] minterms, ReadOnlySpan charRangesPerMinterm, int _maxChar) where T : IBinaryInteger { T[] lookup = new T[_maxChar + 1]; for (int mintermId = 1; mintermId < minterms.Length; mintermId++) { // Each minterm maps to a range of characters. Set each of the characters in those ranges to the corresponding minterm. - foreach ((uint start, uint end) in charRangesPerMinterm[mintermId]) + foreach ((uint start, uint end) in ((uint, uint)[])charRangesPerMinterm[mintermId]) { lookup.AsSpan((int)start, (int)(end + 1 - start)).Fill(T.CreateTruncating(mintermId)); } @@ -101,7 +104,9 @@ public int GetMintermID(int c) } else { - int[] lookup = _intLookup!; + Debug.Assert(_intLookup is not null); + + int[] lookup = _intLookup; return (uint)c < (uint)lookup.Length ? lookup[c] : 0; } } @@ -111,12 +116,6 @@ public int GetMintermID(int c) /// public byte[]? ByteLookup => _lookup; - /// - /// Gets a mapping from char to minterm for the rare case when there are >= 255 minterms. - /// Null in the common case where there are fewer than 255 minterms. - /// - public int[]? IntLookup => _intLookup; - /// /// Maximum ordinal character for a non-0 minterm, used to conserve memory /// diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs index 327f5666f9e2a5..e192bb4b50959b 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs @@ -120,8 +120,8 @@ private static void ArrayResizeAndVolatilePublish(ref T[] array, int newSize) /// Pre-computed hot-loop version of nullability check /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private bool IsNullableWithContext(int stateId, int mintermId) => - (_nullabilityArray[stateId] & (1 << (int)GetPositionKind(mintermId))) > 0; + private bool IsNullableWithContext(byte stateNullability, int mintermId) => + (stateNullability & (1 << (int)GetPositionKind(mintermId))) != 0; /// Returns the span from that may contain transitions for the given state private Span GetDeltasFor(MatchingState state) @@ -355,9 +355,7 @@ private int GetCoreStateId(int nfaStateId) /// Gets or creates a new DFA transition. /// This function locks the matcher for safe concurrent use of the - private bool TryCreateNewTransition( - MatchingState sourceState, int mintermId, int offset, bool checkThreshold, [NotNullWhen(true)] out MatchingState? nextState, - long timeoutOccursAt = 0) + private bool TryCreateNewTransition(MatchingState sourceState, int mintermId, int offset, bool checkThreshold, long timeoutOccursAt, [NotNullWhen(true)] out MatchingState? nextState) { Debug.Assert(offset < _dfaDelta.Length); lock (this) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Explore.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Explore.cs index d30a2d9d02e3f3..f73bf610446a53 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Explore.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Explore.cs @@ -35,16 +35,22 @@ public override void Explore(bool includeDotStarred, bool includeReverse, bool i { // Don't dequeue yet, because a transition might fail MatchingState state = toExplore.Peek(); + // Include the special minterm for the last end-of-line if the state is sensitive to it int maxMinterm = state.StartsWithLineAnchor ? _minterms!.Length : _minterms!.Length - 1; + // Explore successor states for each minterm for (int mintermId = 0; mintermId <= maxMinterm; ++mintermId) { int offset = DeltaOffset(state.Id, mintermId); - if (!TryCreateNewTransition(state, mintermId, offset, true, out MatchingState? nextState)) + if (!TryCreateNewTransition(state, mintermId, offset, true, 0, out MatchingState? nextState)) + { goto DfaLimitReached; + } + EnqueueIfUnseen(nextState, seen, toExplore); } + // Safe to dequeue now that the state has been completely handled toExplore.Dequeue(); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs index 6d9e5baca69a8d..2bcf4217b790eb 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs @@ -71,7 +71,7 @@ public override IEnumerable SampleMatches(int k, int randomseed) NfaMatchingState states = new(); // Here one could also consider previous characters for example for \b, \B, and ^ anchors // and initialize inputSoFar accordingly - states.InitializeFrom(this, _initialStates[GetCharKind([], -1)]); + states.InitializeFrom(this, _initialStates[GetCharKind([], -1)]); CurrentState statesWrapper = new(states); // Used for end suffixes diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs index 08f423b03344ad..e67a72018dfd57 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs @@ -332,10 +332,10 @@ internal TSet GetMintermFromId(int mintermId) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private uint GetCharKind(ReadOnlySpan input, int i) - where TInputReader : struct, IInputReader => !_pattern._info.ContainsSomeAnchor ? + private uint GetCharKind(ReadOnlySpan input, int i) => + !_pattern._info.ContainsSomeAnchor ? CharKind.General : // The previous character kind is irrelevant when anchors are not used. - GetPositionKind(TInputReader.GetPositionId(this, input, i)); + GetPositionKind(DefaultInputReader.GetPositionId(this, input, i)); private void CheckTimeout(long timeoutOccursAt) { @@ -374,23 +374,23 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan input, i // The Z anchor and over 255 minterms are rare enough to consider them separate edge cases. int matchEnd; - if (!_containsEndZAnchor && _mintermClassifier.IntLookup is null) + if (!_containsEndZAnchor && _mintermClassifier.ByteLookup is not null) { // Optimize processing for the common case of no Z anchor and <= 255 minterms. Specialize each call with different generic method arguments. matchEnd = (_findOpts is not null, _containsAnyAnchor) switch { - (true, true) => FindEndPositionOptimized(input, startat, timeoutOccursAt, mode, perThreadData), - (true, false) => FindEndPositionOptimized(input, startat, timeoutOccursAt, mode, perThreadData), - (false, false) => FindEndPositionOptimized(input, startat, timeoutOccursAt, mode, perThreadData), - (false, true) => FindEndPositionOptimized(input, startat, timeoutOccursAt, mode, perThreadData), + (true, true) => FindEndPositionOptimized(input, startat, timeoutOccursAt, mode, perThreadData), + (true, false) => FindEndPositionOptimized(input, startat, timeoutOccursAt, mode, perThreadData), + (false, true) => FindEndPositionOptimized(input, startat, timeoutOccursAt, mode, perThreadData), + (false, false) => FindEndPositionOptimized(input, startat, timeoutOccursAt, mode, perThreadData), }; } else { // Fallback for Z anchor or over 255 minterms matchEnd = _findOpts is not null ? - FindEndPositionFallback(input, startat, timeoutOccursAt, mode, perThreadData) : - FindEndPositionFallback(input, startat, timeoutOccursAt, mode, perThreadData); + FindEndPositionFallback(input, startat, timeoutOccursAt, mode, perThreadData) : + FindEndPositionFallback(input, startat, timeoutOccursAt, mode, perThreadData); } // If there wasn't a match, we're done. @@ -425,7 +425,7 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan input, i if (_optimizedReversalInfo.Kind is MatchReversalKind.MatchStart) { // No fixed-length knowledge. Start at the end of the match. - reversalStartState = new CurrentState(_reverseInitialStates[GetCharKind(input, matchEnd)]); + reversalStartState = new CurrentState(_reverseInitialStates[GetCharKind(input, matchEnd)]); } else { @@ -436,9 +436,7 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan input, i // reversal may already be nullable here in the case of anchors if (_containsAnyAnchor && _nullabilityArray[reversalStartState.DfaStateId] > 0 && - FullNullabilityHandler.IsNullableAt( - this, in reversalStartState, FullInputReader.GetPositionId(this, input, i), - DfaStateHandler.GetStateFlags(this, in reversalStartState))) + DefaultNullabilityHandler.IsNullableAt(this, in reversalStartState, DefaultInputReader.GetPositionId(this, input, i))) { initialLastStart = i; } @@ -448,10 +446,10 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan input, i { // Call FindStartPosition with generic method arguments based on the presence of anchors. This is purely an optimization; // the (true, true) case is functionally complete whereas the (false, false) case is the most optimized. - (true, true) => FindStartPosition(reversalStartState, initialLastStart, input, i, startat, perThreadData), - (true, false) => FindStartPosition(reversalStartState, initialLastStart, input, i, startat, perThreadData), - (false, true) => FindStartPosition(reversalStartState, initialLastStart, input, i, startat, perThreadData), - (false, false) => FindStartPosition(reversalStartState, initialLastStart, input, i, startat, perThreadData), + (true, true) => FindStartPosition(reversalStartState, initialLastStart, input, i, startat, perThreadData), + (true, false) => FindStartPosition(reversalStartState, initialLastStart, input, i, startat, perThreadData), + (false, true) => FindStartPosition(reversalStartState, initialLastStart, input, i, startat, perThreadData), + (false, false) => FindStartPosition(reversalStartState, initialLastStart, input, i, startat, perThreadData), }; break; @@ -477,8 +475,8 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan input, i else { Registers endRegisters = _containsAnyAnchor ? - FindSubcaptures(input, matchStart, matchEnd, perThreadData) : - FindSubcaptures(input, matchStart, matchEnd, perThreadData); + FindSubcaptures(input, matchStart, matchEnd, perThreadData) : + FindSubcaptures(input, matchStart, matchEnd, perThreadData); return new SymbolicMatch(matchStart, matchEnd - matchStart, endRegisters.CaptureStarts, endRegisters.CaptureEnds); } } @@ -486,14 +484,13 @@ public SymbolicMatch FindMatch(RegexRunnerMode mode, ReadOnlySpan input, i /// /// Streamlined version of that doesn't handle /z anchors or very large sets of minterms. /// - private int FindEndPositionOptimized( + private int FindEndPositionOptimized( ReadOnlySpan input, int pos, long timeoutOccursAt, RegexRunnerMode mode, PerThreadData perThreadData) - where TAcceleratedStateHandler : struct, IAcceleratedStateHandler - where TOptimizedNullabilityHandler : struct, IOptimizedNullabilityHandler + where TInitialStateHandler : struct, IInitialStateHandler + where TOptimizedNullabilityHandler : struct, IDfaNoZAnchorOptimizedNullabilityHandler { - // Initial state candidate. (This is not used in the common DFA caseand could potentially be removed in the future.) - int initialStatePosCandidate = pos; - var currentState = new CurrentState(_dotstarredInitialStates[GetCharKind(input, pos - 1)]); + // Initial state candidate. + var currentState = new CurrentState(_dotstarredInitialStates[GetCharKind(input, pos - 1)]); int endPos = NoMatchExists; int lengthMinus1 = input.Length - 1; @@ -505,7 +502,7 @@ private int FindEndPositionOptimized DfaCharsPerTimeoutCheck ? pos + DfaCharsPerTimeoutCheck : lengthMinus1; - done = FindEndPositionDeltasDFAOptimized( + done = FindEndPositionDeltasDFAOptimized( input, innerLoopLength, mode, timeoutOccursAt, ref pos, ref currentState.DfaStateId, ref endPos); } @@ -514,9 +511,9 @@ private int FindEndPositionOptimized NfaCharsPerTimeoutCheck ? pos + NfaCharsPerTimeoutCheck : input.Length; - done = FindEndPositionDeltasNFA( - input, innerLoopLength, mode, timeoutOccursAt, ref pos, - ref currentState, ref endPos, ref initialStatePosCandidate, ref initialStatePosCandidate); + done = FindEndPositionDeltasNFA( + input, innerLoopLength, mode, ref pos, + ref currentState, ref endPos); } // If the inner loop indicates that the search finished (for example due to reaching a deadend state) or @@ -556,17 +553,13 @@ private int FindEndPositionOptimized /// A one-past-the-end index into input for the preferred match, or first final state position if isMatch is true, or NoMatchExists if no match exists. /// - private int FindEndPositionFallback(ReadOnlySpan input, int pos, long timeoutOccursAt, RegexRunnerMode mode, PerThreadData perThreadData) - where TInputReader : struct, IInputReader - where TFindOptimizationsHandler : struct, IInitialStateHandler + private int FindEndPositionFallback(ReadOnlySpan input, int pos, long timeoutOccursAt, RegexRunnerMode mode, PerThreadData perThreadData) + where TInitialStateHandler : struct, IInitialStateHandler where TNullabilityHandler : struct, INullabilityHandler { - int initialStatePosCandidate = pos; - - var currentState = new CurrentState(_dotstarredInitialStates[GetCharKind(input, pos - 1)]); + var currentState = new CurrentState(_dotstarredInitialStates[GetCharKind(input, pos - 1)]); int endPos = NoMatchExists; - int endStateId = -1; while (true) { @@ -583,16 +576,16 @@ private int FindEndPositionFallback DfaCharsPerTimeoutCheck ? pos + DfaCharsPerTimeoutCheck : input.Length; - done = FindEndPositionDeltasDFA( - input, innerLoopLength, mode, timeoutOccursAt, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePosCandidate); + done = FindEndPositionDeltasDFA( + input, innerLoopLength, mode, ref pos, ref currentState, ref endPos); } else { // NFA fallback check, assume \Z and full nullability for NFA since it's already extremely rare to get here. const int NfaCharsPerTimeoutCheck = 1_000; innerLoopLength = _checkTimeout && input.Length - pos > NfaCharsPerTimeoutCheck ? pos + NfaCharsPerTimeoutCheck : input.Length; - done = FindEndPositionDeltasNFA( - input, innerLoopLength, mode, timeoutOccursAt, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePosCandidate); + done = FindEndPositionDeltasNFA( + input, innerLoopLength, mode, ref pos, ref currentState, ref endPos); } // If the inner loop indicates that the search finished (for example due to reaching a deadend state) or @@ -629,11 +622,11 @@ private int FindEndPositionFallback - private bool FindEndPositionDeltasDFAOptimized( + private bool FindEndPositionDeltasDFAOptimized( ReadOnlySpan input, int lengthMinus1, RegexRunnerMode mode, long timeoutOccursAt, ref int posRef, ref int currentStateIdRef, ref int endPosRef) - where TAcceleratedStateHandler : struct, IAcceleratedStateHandler - where TOptimizedNullabilityHandler : struct, IOptimizedNullabilityHandler + where TInitialStateHandler : struct, IInitialStateHandler + where TOptimizedNullabilityHandler : struct, IDfaNoZAnchorOptimizedNullabilityHandler { // Initial check for input end lifted out of the subsequent hot-path loop. if (posRef == input.Length) @@ -651,7 +644,7 @@ private bool FindEndPositionDeltasDFAOptimized= lengthMinus1) { if (pos + 1 < input.Length) @@ -732,35 +729,24 @@ private bool FindEndPositionDeltasDFAOptimized - /// Workhorse inner loop for . Consumes the character by character, + /// Workhorse inner loop for . Consumes the character by character, /// starting at , for each character transitioning from one state in the DFA or NFA graph to the next state, /// lazily building out the graph as needed. /// - /// - /// The supplies the actual transitioning logic, controlling whether processing is - /// performed in DFA mode or in NFA mode. However, it expects to be configured to match, - /// so for example if is a , it expects the 's - /// to be non-negative and its to be null; vice versa for - /// . - /// /// - /// A positive value if iteration completed because it reached a deadend state or nullable state and the call is an isMatch. - /// 0 if iteration completed because we reached an initial state. - /// A negative value if iteration completed because we ran out of input or we failed to transition. + /// true if all input has been explored and there's no further work to be done; false if there's more input to explore and/or + /// we need to transition from DFA mode to NFA mode. /// - private bool FindEndPositionDeltasDFA(ReadOnlySpan input, int length, RegexRunnerMode mode, - long timeoutOccursAt, ref int posRef, ref CurrentState state, ref int endPosRef, ref int initialStatePosRef, ref int initialStatePosCandidateRef) - where TStateHandler : struct, IStateHandler - where TInputReader : struct, IInputReader - where TFindOptimizationsHandler : struct, IInitialStateHandler + private bool FindEndPositionDeltasDFA(ReadOnlySpan input, int length, RegexRunnerMode mode, + ref int posRef, ref CurrentState stateRef, ref int endPosRef) + where TInitialStateHandler : struct, IInitialStateHandler where TNullabilityHandler : struct, INullabilityHandler { // To avoid frequent reads/writes to ref and out values, make and operate on local copies, which we then copy back once before returning. int pos = posRef; int endPos = endPosRef; - int initialStatePos = initialStatePosRef; - int initialStatePosCandidate = initialStatePosCandidateRef; + CurrentState state = stateRef; int deadStateId = _deadStateId; int initialStateId = _initialStateId; try @@ -768,32 +754,20 @@ private bool FindEndPositionDeltasDFA(this, input, ref state, ref pos)) - { - return true; - } - initialStatePosCandidate = pos; - } - - int positionId = TInputReader.GetPositionId(this, input, pos); + int positionId = DefaultInputReader.GetPositionId(this, input, pos); // If the state is nullable for the next character, meaning it accepts the empty string, // we found a potential end state. - if (TNullabilityHandler.IsNullableAt(this, in state, positionId, TStateHandler.GetStateFlags(this, in state))) + if (TNullabilityHandler.IsNullableAt(this, in state, positionId)) { endPos = pos; - // endStateId = TStateHandler.ExtractNullableCoreStateId(this, in state, input, pos); - initialStatePos = initialStatePosCandidate; - // A match is known to exist. If that's all we need to know, we're done. if (mode == RegexRunnerMode.ExistenceRequired) { @@ -802,7 +776,7 @@ private bool FindEndPositionDeltasDFA= length || !TStateHandler.TryTakeTransition(this, ref state, positionId, timeoutOccursAt)) + if (pos >= length || !DfaStateHandler.TryTakeTransition(this, ref state.DfaStateId, positionId)) { return false; } @@ -816,62 +790,46 @@ private bool FindEndPositionDeltasDFA - /// Workhorse inner loop for . Consumes the character by character, + /// Workhorse inner loop for . Consumes the character by character, /// starting at , for each character transitioning from one state in the DFA or NFA graph to the next state, /// lazily building out the graph as needed. /// - /// - /// The supplies the actual transitioning logic, controlling whether processing is - /// performed in DFA mode or in NFA mode. However, it expects to be configured to match, - /// so for example if is a , it expects the 's - /// to be non-negative and its to be null; vice versa for - /// . - /// /// /// A positive value if iteration completed because it reached a deadend state or nullable state and the call is an isMatch. /// 0 if iteration completed because we reached an initial state. /// A negative value if iteration completed because we ran out of input or we failed to transition. /// - private bool FindEndPositionDeltasNFA( - ReadOnlySpan input, int length, RegexRunnerMode mode, long timeoutOccursAt, - ref int posRef, ref CurrentState state, ref int endPosRef, ref int initialStatePosRef, ref int initialStatePosCandidateRef) - where TStateHandler : struct, IStateHandler - where TInputReader : struct, IInputReader - where TFindOptimizationsHandler : struct, IInitialStateHandler + private bool FindEndPositionDeltasNFA( + ReadOnlySpan input, int length, RegexRunnerMode mode, + ref int posRef, ref CurrentState state, ref int endPosRef) where TNullabilityHandler : struct, INullabilityHandler { // To avoid frequent reads/writes to ref and out values, make and operate on local copies, which we then copy back once before returning. int pos = posRef; int endPos = endPosRef; - int initialStatePos = initialStatePosRef; - int initialStatePosCandidate = initialStatePosCandidateRef; try { // Loop through each character in the input, transitioning from state to state for each. while (true) { - StateFlags flags = TStateHandler.GetStateFlags(this, in state); - // Dead end here means the set is empty if (state.NfaState!.NfaStateSet.Count == 0) { return true; } - int positionId = TInputReader.GetPositionId(this, input, pos); + int positionId = DefaultInputReader.GetPositionId(this, input, pos); // If the state is nullable for the next character, meaning it accepts the empty string, // we found a potential end state. - if (TNullabilityHandler.IsNullableAt(this, in state, positionId, flags)) + if (TNullabilityHandler.IsNullableAt(this, in state, positionId)) { endPos = pos; - initialStatePos = initialStatePosCandidate; // A match is known to exist. If that's all we need to know, we're done. if (mode == RegexRunnerMode.ExistenceRequired) @@ -881,7 +839,7 @@ private bool FindEndPositionDeltasNFA= length || !TStateHandler.TryTakeTransition(this, ref state, positionId, timeoutOccursAt)) + if (pos >= length || !NfaStateHandler.TryTakeTransition(this, ref state, positionId)) { return false; } @@ -895,8 +853,6 @@ private bool FindEndPositionDeltasNFA(CurrentState st { // Run the DFA or NFA traversal backwards from the current point using the current state. bool done = currentState.NfaState is not null ? - FindStartPositionDeltasNFA(input, ref i, matchStartBoundary, ref currentState, ref lastStart) : - FindStartPositionDeltasDFA(input, ref i, matchStartBoundary, ref currentState, ref lastStart); + FindStartPositionDeltasNFA(input, ref i, matchStartBoundary, ref currentState, ref lastStart) : + FindStartPositionDeltasDFA(input, ref i, matchStartBoundary, ref currentState, ref lastStart); // If we found the starting position, we're done. if (done) @@ -957,14 +913,14 @@ private int FindStartPosition(CurrentState st /// starting at , for each character transitioning from one state in the DFA or NFA graph to the next state, /// lazily building out the graph as needed. /// - private bool FindStartPositionDeltasDFA( - ReadOnlySpan input, ref int i, int startThreshold, ref CurrentState state, ref int lastStart) - where TStateHandler : struct, IStateHandler + private bool FindStartPositionDeltasDFA( + ReadOnlySpan input, ref int i, int startThreshold, ref CurrentState stateRef, ref int lastStart) where TInputReader : struct, IInputReader where TNullabilityHandler : struct, INullabilityHandler { // To avoid frequent reads/writes to ref values, make and operate on local copies, which we then copy back once before returning. int pos = i; + CurrentState state = stateRef; try { // Loop backwards through each character in the input, transitioning from state to state for each. @@ -974,8 +930,8 @@ private bool FindStartPositionDeltasDFA 0 && - TNullabilityHandler.IsNullableAt(this, in state, positionId, TStateHandler.GetStateFlags(this, in state))) + if (_nullabilityArray[state.DfaStateId] != 0 && + TNullabilityHandler.IsNullableAt(this, in state, positionId)) { lastStart = pos; } @@ -989,7 +945,7 @@ private bool FindStartPositionDeltasDFA(ReadOnlySpan input, ref int i, int startThreshold, ref CurrentState state, ref int lastStart) - where TStateHandler : struct, IStateHandler + private bool FindStartPositionDeltasNFA(ReadOnlySpan input, ref int i, int startThreshold, ref CurrentState state, ref int lastStart) where TInputReader : struct, IInputReader where TNullabilityHandler : struct, INullabilityHandler { @@ -1022,7 +978,7 @@ private bool FindStartPositionDeltasNFA(this, in state, positionId, TStateHandler.GetStateFlags(this, in state))) + if (TNullabilityHandler.IsNullableAt(this, in state, positionId)) { lastStart = pos; } @@ -1036,7 +992,7 @@ private bool FindStartPositionDeltasNFA(ReadOnlySpan input, int i, where TInputReader : struct, IInputReader { // Pick the correct start state based on previous character kind. - MatchingState initialState = _initialStates[GetCharKind(input, i - 1)]; + MatchingState initialState = _initialStates[GetCharKind(input, i - 1)]; Registers initialRegisters = perThreadData.InitialRegisters; @@ -1117,7 +1073,7 @@ private Registers FindSubcaptures(ReadOnlySpan input, int i, StateFlags flags = _stateFlagsArray[coreStateId]; Debug.Assert(coreStateId != _deadStateId); - if (flags.IsNullable() || (flags.CanBeNullable() && GetState(coreStateId).IsNullableFor(GetCharKind(input, i + 1)))) + if (flags.IsNullable() || (flags.CanBeNullable() && GetState(coreStateId).IsNullableFor(GetCharKind(input, i + 1)))) { // No lower priority transitions from this or other source states are taken because the // backtracking engines would return the match ending here. @@ -1140,11 +1096,11 @@ private Registers FindSubcaptures(ReadOnlySpan input, int i, foreach ((int endStateId, Registers endRegisters) in current.Values) { MatchingState endState = GetState(GetCoreStateId(endStateId)); - if (endState.IsNullableFor(GetCharKind(input, iEnd))) + if (endState.IsNullableFor(GetCharKind(input, iEnd))) { // Apply effects for finishing at the stored end state endState.Node.ApplyEffects((effect, args) => args.Registers.ApplyEffect(effect, args.Pos), - CharKind.Context(endState.PrevCharKind, GetCharKind(input, iEnd)), (Registers: endRegisters, Pos: iEnd)); + CharKind.Context(endState.PrevCharKind, GetCharKind(input, iEnd)), (Registers: endRegisters, Pos: iEnd)); return endRegisters; } } @@ -1153,15 +1109,12 @@ private Registers FindSubcaptures(ReadOnlySpan input, int i, return default; } - /// Look up the min term ID for the character at the specified position in the input. + /// Look up the min term ID for the character. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int GetMintermId(byte[] mintermLookup, ReadOnlySpan input, int pos) - { - Debug.Assert(pos >= 0 && pos < input.Length); - - char c = input[pos]; - return c < (uint)mintermLookup.Length ? mintermLookup[c] : 0; - } + private static int GetMintermId(byte[] mintermLookup, char c) => + c < (uint)mintermLookup.Length ? + mintermLookup[c] : + 0; /// Stores additional data for tracking capture start and end positions. /// The NFA simulation based third phase has one of these for each current state in the current set of live states. @@ -1313,11 +1266,7 @@ public CurrentState(NfaMatchingState nfaState) /// Represents a set of routines for operating over a . private interface IStateHandler { - public static abstract bool StartsWithLineAnchor(SymbolicRegexMatcher matcher, in CurrentState state); public static abstract bool IsNullableFor(SymbolicRegexMatcher matcher, in CurrentState state, uint nextCharKind); - public static abstract int ExtractNullableCoreStateId(SymbolicRegexMatcher matcher, in CurrentState state, ReadOnlySpan input, int pos); - public static abstract int FixedLength(SymbolicRegexMatcher matcher, in CurrentState state, uint nextCharKind); - public static abstract bool TryTakeTransition(SymbolicRegexMatcher matcher, ref CurrentState state, int mintermId, long timeoutOccursAt); public static abstract StateFlags GetStateFlags(SymbolicRegexMatcher matcher, in CurrentState state); } @@ -1325,46 +1274,34 @@ private interface IStateHandler private readonly struct DfaStateHandler : IStateHandler { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool StartsWithLineAnchor(SymbolicRegexMatcher matcher, in CurrentState state) => matcher.GetState(state.DfaStateId).StartsWithLineAnchor; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsNullableFor(SymbolicRegexMatcher matcher, in CurrentState state, - uint nextCharKind) => matcher._nullabilityArray[state.DfaStateId] > 0 && ((byte)(1 << (int)nextCharKind) & matcher._nullabilityArray[state.DfaStateId]) > 0; - - /// Gets the preferred DFA state for nullability. In DFA mode this is just the state itself. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int ExtractNullableCoreStateId(SymbolicRegexMatcher matcher, in CurrentState state, ReadOnlySpan input, int pos) => state.DfaStateId; - - /// Gets the length of any fixed-length marker that exists for this state, or -1 if there is none. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int FixedLength(SymbolicRegexMatcher matcher, in CurrentState state, uint nextCharKind) => matcher.GetState(state.DfaStateId).FixedLength(nextCharKind); + public static bool IsNullableFor(SymbolicRegexMatcher matcher, in CurrentState state, uint nextCharKind) => + matcher._nullabilityArray[state.DfaStateId] > 0 && + ((byte)(1 << (int)nextCharKind) & matcher._nullabilityArray[state.DfaStateId]) > 0; /// Take the transition to the next DFA state. [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref CurrentState state, int mintermId, - long timeoutOccursAt) + public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref int dfaStateId, int mintermId) { - Debug.Assert(state.DfaStateId > 0, $"Expected non-zero {nameof(state.DfaStateId)}."); - Debug.Assert(state.NfaState is null, $"Expected null {nameof(state.NfaState)}."); + Debug.Assert(dfaStateId > 0, $"Expected non-zero {nameof(dfaStateId)}."); // Use the mintermId for the character being read to look up which state to transition to. // If that state has already been materialized, move to it, and we're done. If that state // hasn't been materialized, try to create it; if we can, move to it, and we're done. - int dfaOffset = matcher.DeltaOffset(state.DfaStateId, mintermId); + int dfaOffset = matcher.DeltaOffset(dfaStateId, mintermId); int nextStateId = matcher._dfaDelta[dfaOffset]; if (nextStateId > 0) { // There was an existing DFA transition to some state. Move to it and // return that we're still operating as a DFA and can keep going. - state.DfaStateId = nextStateId; + dfaStateId = nextStateId; return true; } - if (matcher.TryCreateNewTransition(matcher.GetState(state.DfaStateId), mintermId, dfaOffset, checkThreshold: true, out MatchingState? nextState)) + if (matcher.TryCreateNewTransition(matcher.GetState(dfaStateId), mintermId, dfaOffset, checkThreshold: true, timeoutOccursAt: 0, out MatchingState? nextState)) { // We were able to create a new DFA transition to some state. Move to it and // return that we're still operating as a DFA and can keep going. - state.DfaStateId = nextState.Id; + dfaStateId = nextState.Id; return true; } @@ -1373,8 +1310,7 @@ public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref Cur /// Transition function that only considers DFA state id [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool TryTakeDFATransition(SymbolicRegexMatcher matcher, ref int state, - int mintermId, long timeoutOccursAt) + internal static bool TryTakeDFATransition(SymbolicRegexMatcher matcher, ref int state, int mintermId, long timeoutOccursAt) { Debug.Assert(state > 0, $"Expected {nameof(state)} {state} > 0"); @@ -1390,9 +1326,7 @@ internal static bool TryTakeDFATransition(SymbolicRegexMatcher matcher, re return true; } - if (matcher.TryCreateNewTransition(matcher.GetState(state), mintermId, - matcher.DeltaOffset(state, mintermId), - checkThreshold: true, out MatchingState? nextState, timeoutOccursAt)) + if (matcher.TryCreateNewTransition(matcher.GetState(state), mintermId, matcher.DeltaOffset(state, mintermId), checkThreshold: true, timeoutOccursAt, out MatchingState? nextState)) { // We were able to create a new DFA transition to some state. Move to it and // return that we're still operating as a DFA and can keep going. @@ -1446,42 +1380,8 @@ public static bool IsNullableFor(SymbolicRegexMatcher matcher, in CurrentS return false; } - /// Gets the preferred DFA state for nullability. In DFA mode this is just the state itself. - public static int ExtractNullableCoreStateId(SymbolicRegexMatcher matcher, in CurrentState state, ReadOnlySpan input, int pos) - { - uint nextCharKind = matcher.GetCharKind(input, pos); - foreach (ref KeyValuePair nfaState in CollectionsMarshal.AsSpan(state.NfaState!.NfaStateSet.Values)) - { - MatchingState coreState = matcher.GetState(matcher.GetCoreStateId(nfaState.Key)); - if (coreState.IsNullableFor(nextCharKind)) - { - return coreState.Id; - } - } - - Debug.Fail("ExtractNullableCoreStateId should only be called in nullable state/context."); - return -1; - } - - /// Gets the length of any fixed-length marker that exists for this state, or -1 if there is none. - public static int FixedLength(SymbolicRegexMatcher matcher, in CurrentState state, uint nextCharKind) - { - foreach (ref KeyValuePair nfaState in CollectionsMarshal.AsSpan(state.NfaState!.NfaStateSet.Values)) - { - MatchingState coreState = matcher.GetState(matcher.GetCoreStateId(nfaState.Key)); - if (coreState.IsNullableFor(nextCharKind)) - { - return coreState.FixedLength(nextCharKind); - } - } - - Debug.Fail("FixedLength should only be called in nullable state/context."); - return -1; - } - /// Take the transition to the next NFA state. - public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref CurrentState state, int mintermId, - long timeoutOccursAt = 0) + public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref CurrentState state, int mintermId) { Debug.Assert(state.DfaStateId < 0, $"Expected negative {nameof(state.DfaStateId)}."); Debug.Assert(state.NfaState is not null, $"Expected non-null {nameof(state.NfaState)}."); @@ -1557,7 +1457,7 @@ static int[] GetNextStates(int sourceState, int mintermId, SymbolicRegexMatcher< /// In NFA mode: /// - an empty set of states means that it is a dead end /// - no set of states qualifies as an initial state. This could be made more accurate, but with that the - /// matching logic would need to be updated to handle the fact that + /// matching logic would need to be updated to handle the fact that /// can transition back to a DFA state. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -1599,26 +1499,28 @@ public static void UndoTransition(ref CurrentState state) /// /// Interface for mapping positions in the input to position IDs, which capture all the information necessary to - /// both take transitions and decide nullability. For positions of valid characters that are handled normally, - /// these IDs coincide with minterm IDs (i.e. indices to ). Positions outside the bounds - /// of the input are mapped to -1. Optionally, an end-of-line as the very last character in the input may be - /// mapped to _minterms.Length for supporting the \Z anchor. + /// both take transitions and decide nullability. /// private interface IInputReader { + /// Gets the position ID for the specified character in the input. + /// + /// For positions of valid characters that are handled normally, these IDs coincide with minterm IDs (i.e. indices to ). + /// Positions outside the bounds of the input are mapped to -1. Optionally, an end-of-line as the very last character in the input may be + /// mapped to _minterms.Length for supporting the \Z anchor. The and parameters are specified + /// separately, rather than input[pos] being passed in as a single , because some inputs need to act differently + /// based on the position itself. + /// public static abstract int GetPositionId(SymbolicRegexMatcher matcher, ReadOnlySpan input, int pos); } - /// This reader omits the special handling of \n for the \Z anchor. - private readonly struct NoZAnchorInputReader : IInputReader - { - public static int GetPositionId(SymbolicRegexMatcher matcher, ReadOnlySpan input, int pos) => - (uint)pos < (uint)input.Length ? matcher._mintermClassifier.GetMintermID(input[pos]) : -1; - } - - /// This reader includes full handling of an \n as the last character of input for the \Z anchor. - private readonly struct FullInputReader : IInputReader + /// Provides an input reader that includes full handling of an \n as the last character of input for the \Z anchor. + private readonly struct DefaultInputReader : IInputReader { + /// + /// Gets the minterm ID of the specified character, -1 if the position isn't within the input, or .Length + /// for a \n at the very end of the input. + /// public static int GetPositionId(SymbolicRegexMatcher matcher, ReadOnlySpan input, int pos) { if ((uint)pos < (uint)input.Length) @@ -1634,181 +1536,199 @@ public static int GetPositionId(SymbolicRegexMatcher matcher, ReadOnlySpan } } - private interface IInitialStateHandler + /// Provides an optimized input reader that doesn't provide special-handling of \n at the end of the input for the \Z anchor. + private readonly struct NoZAnchorOptimizedInputReader : IInputReader { - public static abstract bool TryFindNextStartingPosition( - SymbolicRegexMatcher matcher, ReadOnlySpan input, - ref CurrentState state, ref int pos) - where TInputReader : struct, IInputReader; + /// Gets the minterm ID of the specified character, or -1 if the position isn't within the input. + public static int GetPositionId(SymbolicRegexMatcher matcher, ReadOnlySpan input, int pos) => + (uint)pos < (uint)input.Length ? + matcher._mintermClassifier.GetMintermID(input[pos]) : + -1; } - /// - /// Interface for accelerated states, returns true if position was changed - /// - private interface IAcceleratedStateHandler + /// Represents a handler used to determine the next possible matching position from an initial state. + private interface IInitialStateHandler { + /// Gets whether the handler performs any meaningful operation. If false, always returns true. + /// + /// This should be implemented to always return a constant true or false. The consumer will inline it and, if this is false, can dead-code eliminate + /// anything guarded by the condition. + /// + public static abstract bool IsOptimized { get; } + + /// Gets the next viable starting position. + /// true if a possible match location is found; false if no match is possible anywhere in the remaining input. + /// This may be used if is false but it will then always return true indicating that the current position may be viable. public static abstract bool TryFindNextStartingPosition( - SymbolicRegexMatcher matcher, ReadOnlySpan input, - byte[] lookup, ref int currentStateId, ref int pos, int initialStateId); + SymbolicRegexMatcher matcher, ReadOnlySpan input, ref int currentStateId, ref int pos, byte[]? lookup); + } + + /// Provides an initial state handler for when there are no initial state optimizations to apply. + private readonly struct NoOptimizationsInitialStateHandler : IInitialStateHandler + { + /// Returns false. + public static bool IsOptimized => false; + + /// Returns true. No optimizations are known to be able to skip states, thus every position is a viable starting position. + public static bool TryFindNextStartingPosition( + SymbolicRegexMatcher matcher, ReadOnlySpan input, ref int currentStateId, ref int pos, byte[]? lookup) => + true; } - private readonly struct NoAnchorAcceleratedStateHandler : IAcceleratedStateHandler + /// Provides a handler that uses the matcher's to optimize searching for the next viable starting state. + private readonly struct FindOptimizationsInitialStateHandler : IInitialStateHandler { + /// Returns true. + public static bool IsOptimized => true; + + /// Gets the next viable starting position. + /// true if a viable starting position was found; false if no further possible match exists. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool TryFindNextStartingPosition( - SymbolicRegexMatcher matcher, ReadOnlySpan input, byte[] lookup, ref int currentStateId, ref int pos, int initialStateId) + SymbolicRegexMatcher matcher, ReadOnlySpan input, ref int currentStateId, ref int pos, byte[]? lookup) { - if (currentStateId != initialStateId) - { - return false; - } + Debug.Assert(matcher._findOpts is not null); - if (!matcher._findOpts!.TryFindNextStartingPositionLeftToRight(input, ref pos, 0)) + // Find the first position that matches with some likely character. + if (matcher._findOpts.TryFindNextStartingPositionLeftToRight(input, ref pos, 0)) { - // No match exists - currentStateId = matcher._deadStateId; - pos = input.Length; + // Update the starting state based on where TryFindNextStartingPosition moved us to. + // As with the initial starting state, if it's a dead end, no match exists. + currentStateId = matcher._dotstarredInitialStates[matcher.GetCharKind(input, pos - 1)].Id; + return true; } - return true; + // No match exists + Debug.Assert(pos == input.Length); + currentStateId = matcher._deadStateId; + return false; } } - private readonly struct AcceleratedStateHandler : IAcceleratedStateHandler + /// Provides a handler that uses the matcher's to optimize searching for the next viable starting state. + /// This implementation works only when there are no /Z anchors in the pattern. + private readonly struct NoZAnchorFindOptimizationsInitialStateHandler : IInitialStateHandler { + /// Returns true. + public static bool IsOptimized => true; + + /// Gets the next viable starting position. + /// true if a viable starting position was found; false if no further possible match exists. [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool TryFindNextStartingPosition(SymbolicRegexMatcher matcher, - ReadOnlySpan input, - byte[] lookup, ref int currentStateId, ref int pos, int initialStateId) + public static bool TryFindNextStartingPosition( + SymbolicRegexMatcher matcher, ReadOnlySpan input, ref int currentStateId, ref int pos, byte[]? lookup) { - if (currentStateId != initialStateId) - { - return false; - } + Debug.Assert(matcher._findOpts is not null); + Debug.Assert(lookup is not null, $"{nameof(NoZAnchorFindOptimizationsInitialStateHandler)} must only be used with call sites that pass non-null {nameof(lookup)}."); - if (matcher._findOpts!.TryFindNextStartingPositionLeftToRight(input, ref pos, 0)) - { - currentStateId = matcher._dotstarredInitialStates[matcher._positionKinds[GetMintermId(lookup, input, pos - 1) + 1]].Id; - } - else + if (matcher._findOpts.TryFindNextStartingPositionLeftToRight(input, ref pos, 0)) { - // No match exists - currentStateId = matcher._deadStateId; - pos = input.Length; + // Update the starting state based on where TryFindNextStartingPosition moved us to. + // This is an optimized version of the update in FindOptimizationsInitialStateHandler that doesn't need to consider the possibility of /Z anchors. + currentStateId = matcher._dotstarredInitialStates[matcher._positionKinds[GetMintermId(lookup, input[pos - 1]) + 1]].Id; + return true; } - return true; + // No match exists + Debug.Assert(pos == input.Length); + currentStateId = matcher._deadStateId; + return false; } } - private readonly struct NoAcceleratedStateHandler : IAcceleratedStateHandler + /// Provides a handler that uses the matcher's to optimize searching for the next viable starting state. + /// This implementation works only when there are no anchors in the pattern. + private readonly struct NoAnchorsFindOptimizationsInitialStateHandler : IInitialStateHandler { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool TryFindNextStartingPosition( - SymbolicRegexMatcher matcher, ReadOnlySpan input, byte[] lookup, ref int currentStateId, ref int pos, int initialStateId) => - false; - } - - /// No-op handler for when there are no initial state optimizations to apply. - private readonly struct NoOptimizationsInitialStateHandler : IInitialStateHandler - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool TryFindNextStartingPosition(SymbolicRegexMatcher matcher, ReadOnlySpan input, ref CurrentState state, ref int pos) - where TInputReader : struct, IInputReader => - true; // the current position is a possible starting position - } + /// Returns true. + public static bool IsOptimized => true; - /// - /// Handler for when a instance is available. - /// - private readonly struct InitialStateFindOptimizationsHandler : IInitialStateHandler - { + /// Gets the next viable starting position. + /// true if a viable starting position was found; false if no further possible match exists. [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool TryFindNextStartingPosition(SymbolicRegexMatcher matcher, ReadOnlySpan input, ref CurrentState state, ref int pos) - where TInputReader : struct, IInputReader + public static bool TryFindNextStartingPosition( + SymbolicRegexMatcher matcher, ReadOnlySpan input, ref int currentStateId, ref int pos, byte[]? lookup) { - // Find the first position that matches with some likely character. - if (matcher._findOpts!.TryFindNextStartingPositionLeftToRight(input, ref pos, 0)) + Debug.Assert(!matcher._containsAnyAnchor); + Debug.Assert(matcher._findOpts is not null); + Debug.Assert(currentStateId == matcher._initialStateId, "There are no anchors, so the current state should be the sole initial state."); + + if (matcher._findOpts.TryFindNextStartingPositionLeftToRight(input, ref pos, 0)) { - // Update the starting state based on where TryFindNextStartingPosition moved us to. - // As with the initial starting state, if it's a dead end, no match exists. - state = new CurrentState(matcher._dotstarredInitialStates[matcher.GetCharKind(input, pos - 1)]); + // There are no anchors, so there's only one starting state, so we don't need to update currentStateId that's already the starting state. return true; } // No match exists + Debug.Assert(pos == input.Length); + currentStateId = matcher._deadStateId; return false; } } - /// Interface for evaluating nullability of states. + /// Represents a handler for evaluating nullability of states. private interface INullabilityHandler { + /// Gets whether the specified position is nullable. public static abstract bool IsNullableAt( - SymbolicRegexMatcher matcher, in CurrentState state, int positionId, StateFlags flags) + SymbolicRegexMatcher matcher, in CurrentState state, int positionId) where TStateHandler : struct, IStateHandler; } - /// This nullability handler interface can be used in DFAs for patterns that do not contain \Z. - private interface IOptimizedNullabilityHandler + /// Nullability handler that will work for any pattern. + private readonly struct DefaultNullabilityHandler : INullabilityHandler { - public static abstract bool IsNullable( - SymbolicRegexMatcher matcher, byte[] nullabilityArray, int currStateId, - byte[] lookup, ReadOnlySpan input, int pos); + /// Gets whether the specified position is nullable. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId) + where TStateHandler : struct, IStateHandler + { + StateFlags flags = TStateHandler.GetStateFlags(matcher, in state); + return + flags.IsNullable() || + (flags.CanBeNullable() && TStateHandler.IsNullableFor(matcher, in state, matcher.GetPositionKind(positionId))); + } } - /// - /// Specialized nullability handler for patterns without any anchors. - /// + /// Nullability handler for patterns without any anchors. private readonly struct NoAnchorsNullabilityHandler : INullabilityHandler { + /// Gets whether the specified position is nullable. [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, StateFlags flags) + public static bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId) where TStateHandler : struct, IStateHandler { Debug.Assert(!matcher._pattern._info.ContainsSomeAnchor); - return flags.IsNullable(); + return TStateHandler.GetStateFlags(matcher, in state).IsNullable(); } } - /// - /// Nullability handler that will work for any pattern. - /// - private readonly struct FullNullabilityHandler : INullabilityHandler + /// Represents a handler for evaluating nullability of states and for use in DFAs for patterns that do not contain \Z anchors. + private interface IDfaNoZAnchorOptimizedNullabilityHandler { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, StateFlags flags) - where TStateHandler : struct, IStateHandler => - flags.IsNullable() || - (flags.CanBeNullable() && TStateHandler.IsNullableFor(matcher, in state, matcher.GetPositionKind(positionId))); + /// Gets whether the specified position is nullable. + public static abstract bool IsNullable(SymbolicRegexMatcher matcher, byte stateNullability, char c, byte[] lookup); } - private readonly struct NoAnchorOptimizedNullabilityHandler : IOptimizedNullabilityHandler + /// Optimized nullability handler that works regardless of what additional anchors may exist in a pattern. + private readonly struct DefaultDfaNoZAnchorOptimizedNullabilityHandler : IDfaNoZAnchorOptimizedNullabilityHandler { + /// Gets whether the specified position is nullable. [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsNullable(SymbolicRegexMatcher matcher, byte[] nullabilityArray, int currStateId, byte[] lookup, ReadOnlySpan input, int pos) - { - Debug.Assert(pos >= 0 && pos < input.Length, "input end should not be handled here"); - Debug.Assert(currStateId < nullabilityArray.Length, "nullabilityArray grown but the reference is not up to date"); - return nullabilityArray[currStateId] > 0; - } + public static bool IsNullable(SymbolicRegexMatcher matcher, byte stateNullability, char c, byte[] lookup) => + stateNullability != 0 && + matcher.IsNullableWithContext(stateNullability, c < (uint)lookup.Length ? lookup[c] : 0); } - private readonly struct AnchorOptimizedNullabilityHandler : IOptimizedNullabilityHandler + /// Optimized nullability handler for when a pattern has no anchors at all. + private readonly struct NoAnchorDfaOptimizedNullabilityHandler : IDfaNoZAnchorOptimizedNullabilityHandler { + /// Gets whether the specified position is nullable. [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsNullable(SymbolicRegexMatcher matcher, byte[] nullabilityArray, int currStateId, byte[] lookup, ReadOnlySpan input, int pos) + public static bool IsNullable(SymbolicRegexMatcher matcher, byte stateNullability, char c, byte[] lookup) { - Debug.Assert(pos >= 0 && pos < input.Length, "input end should not be handled here"); - Debug.Assert(currStateId < nullabilityArray.Length, "nullabilityArray grown but the reference is not up to date"); - - if (nullabilityArray[currStateId] > 0) - { - char c = input[pos]; - return matcher.IsNullableWithContext(currStateId, c < (uint)lookup.Length ? lookup[c] : 0); - } - - return false; + Debug.Assert(!matcher._pattern._info.ContainsSomeAnchor); + return stateNullability != 0; } } }