@@ -105,22 +105,28 @@ class RegExpRoot extends RegExpTerm {
105105 */
106106 predicate isRelevant ( ) {
107107 // there is at least one repetition
108- exists ( InfiniteRepetitionQuantifier rep | getRoot ( rep ) = this |
109- // that could possibly match the same thing in multiple ways.
110- exists ( RegExpTerm child |
111- child instanceof RegExpAlt or
112- child instanceof RegExpQuantifier
113- |
114- child .getParent + ( ) = rep
115- )
116- ) and
108+ exists ( MaybeBacktrackingRepetition rep | getRoot ( rep ) = this ) and
117109 // there are no lookbehinds
118110 not exists ( RegExpLookbehind lbh | getRoot ( lbh ) = this ) and
119111 // is actually used as a RegExp
120112 isUsedAsRegExp ( )
121113 }
122114}
123115
116+ /**
117+ * A infinitely repeating quantifier that might backtrack.
118+ */
119+ class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {
120+ MaybeBacktrackingRepetition ( ) {
121+ exists ( RegExpTerm child |
122+ child instanceof RegExpAlt or
123+ child instanceof RegExpQuantifier
124+ |
125+ child .getParent + ( ) = this
126+ )
127+ }
128+ }
129+
124130/**
125131 * A constant in a regular expression that represents valid Unicode character(s).
126132 */
@@ -461,7 +467,7 @@ newtype TState =
461467 * match `t`, or the `i`th character in `t` if `t` is a constant.
462468 */
463469class State extends TState {
464- RegExpParent repr ;
470+ RegExpTerm repr ;
465471
466472 State ( ) { this = Match ( repr , _) or this = Accept ( repr ) }
467473
@@ -473,6 +479,11 @@ class State extends TState {
473479 }
474480
475481 Location getLocation ( ) { result = repr .getLocation ( ) }
482+
483+ /**
484+ * Gets the term represented by this state.
485+ */
486+ RegExpTerm getRepr ( ) { result = repr }
476487}
477488
478489class EdgeLabel extends TInputSymbol {
@@ -586,6 +597,14 @@ State epsilonPred(State q) { q = epsilonSucc(result) }
586597 */
587598predicate deltaClosed ( State q1 , InputSymbol s , State q2 ) { delta ( epsilonSucc * ( q1 ) , s , q2 ) }
588599
600+ /**
601+ * Holds if state `s` might be inside a backtracking repetition.
602+ */
603+ pragma [ noinline]
604+ predicate stateInsideBacktracking ( State s ) {
605+ s .getRepr ( ) .getParent * ( ) instanceof MaybeBacktrackingRepetition
606+ }
607+
589608/**
590609 * A state in the product automaton.
591610 *
@@ -595,12 +614,16 @@ predicate deltaClosed(State q1, InputSymbol s, State q2) { delta(epsilonSucc*(q1
595614 * already constructed. To cut down on the number of states,
596615 * we only represent states `(q1, q2)` where `q1` is lexicographically
597616 * no bigger than `q2`.
617+ *
618+ * States are only constructed if both states in the pair are
619+ * inside a repetition that might backtrack.
598620 */
599621newtype TStatePair =
600622 MkStatePair ( State q1 , State q2 ) {
601623 isFork ( q1 , _, _, _, _) and q2 = q1
602624 or
603- step ( _, _, _, q1 , q2 ) and q1 .toString ( ) <= q2 .toString ( )
625+ step ( _, _, _, q1 , q2 ) and
626+ q1 .toString ( ) <= q2 .toString ( )
604627 }
605628
606629class StatePair extends TStatePair {
@@ -646,6 +669,7 @@ int statePairDist(StatePair q, StatePair r) =
646669 */
647670pragma [ noopt]
648671predicate isFork ( State q , InputSymbol s1 , InputSymbol s2 , State r1 , State r2 ) {
672+ stateInsideBacktracking ( q ) and
649673 exists ( State q1 , State q2 |
650674 q1 = epsilonSucc * ( q ) and
651675 delta ( q1 , s1 , r1 ) and
@@ -675,6 +699,9 @@ predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) {
675699/**
676700 * Holds if there are transitions from the components of `q` to `r1` and `r2`
677701 * labelled with `s1` and `s2`, respectively.
702+ *
703+ * We only consider transitions where the resulting states `(r1, r2)` are both
704+ * inside a repetition that might backtrack.
678705 */
679706pragma [ noopt]
680707predicate step ( StatePair q , InputSymbol s1 , InputSymbol s2 , State r1 , State r2 ) {
@@ -683,7 +710,9 @@ predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2)
683710 deltaClosed ( q2 , s2 , r2 ) and
684711 // use noopt to force the join on `intersect` to happen last.
685712 exists ( intersect ( s1 , s2 ) )
686- )
713+ ) and
714+ stateInsideBacktracking ( r1 ) and
715+ stateInsideBacktracking ( r2 )
687716}
688717
689718/**
0 commit comments