Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 94aa162

Browse files
committed
prune state-pairs that are outside a backtracking repetition
1 parent f3c3b82 commit 94aa162

1 file changed

Lines changed: 41 additions & 12 deletions

File tree

javascript/ql/src/Performance/ReDoS.ql

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -105,22 +105,28 @@ class RegExpRoot extends RegExpTerm {
105105
*/
106106
predicate isRelevant() {
107107
// there is at least one repetition
108-
exists(InfiniteRepetitionQuantifier rep | getRoot(rep) = this |
109-
// that could possibly match the same thing in multiple ways.
110-
exists(RegExpTerm child |
111-
child instanceof RegExpAlt or
112-
child instanceof RegExpQuantifier
113-
|
114-
child.getParent+() = rep
115-
)
116-
) and
108+
exists(MaybeBacktrackingRepetition rep | getRoot(rep) = this) and
117109
// there are no lookbehinds
118110
not exists(RegExpLookbehind lbh | getRoot(lbh) = this) and
119111
// is actually used as a RegExp
120112
isUsedAsRegExp()
121113
}
122114
}
123115

116+
/**
117+
* A infinitely repeating quantifier that might backtrack.
118+
*/
119+
class MaybeBacktrackingRepetition extends InfiniteRepetitionQuantifier {
120+
MaybeBacktrackingRepetition() {
121+
exists(RegExpTerm child |
122+
child instanceof RegExpAlt or
123+
child instanceof RegExpQuantifier
124+
|
125+
child.getParent+() = this
126+
)
127+
}
128+
}
129+
124130
/**
125131
* A constant in a regular expression that represents valid Unicode character(s).
126132
*/
@@ -461,7 +467,7 @@ newtype TState =
461467
* match `t`, or the `i`th character in `t` if `t` is a constant.
462468
*/
463469
class State extends TState {
464-
RegExpParent repr;
470+
RegExpTerm repr;
465471

466472
State() { this = Match(repr, _) or this = Accept(repr) }
467473

@@ -473,6 +479,11 @@ class State extends TState {
473479
}
474480

475481
Location getLocation() { result = repr.getLocation() }
482+
483+
/**
484+
* Gets the term represented by this state.
485+
*/
486+
RegExpTerm getRepr() { result = repr }
476487
}
477488

478489
class EdgeLabel extends TInputSymbol {
@@ -586,6 +597,14 @@ State epsilonPred(State q) { q = epsilonSucc(result) }
586597
*/
587598
predicate deltaClosed(State q1, InputSymbol s, State q2) { delta(epsilonSucc*(q1), s, q2) }
588599

600+
/**
601+
* Holds if state `s` might be inside a backtracking repetition.
602+
*/
603+
pragma[noinline]
604+
predicate stateInsideBacktracking(State s) {
605+
s.getRepr().getParent*() instanceof MaybeBacktrackingRepetition
606+
}
607+
589608
/**
590609
* A state in the product automaton.
591610
*
@@ -595,12 +614,16 @@ predicate deltaClosed(State q1, InputSymbol s, State q2) { delta(epsilonSucc*(q1
595614
* already constructed. To cut down on the number of states,
596615
* we only represent states `(q1, q2)` where `q1` is lexicographically
597616
* no bigger than `q2`.
617+
*
618+
* States are only constructed if both states in the pair are
619+
* inside a repetition that might backtrack.
598620
*/
599621
newtype TStatePair =
600622
MkStatePair(State q1, State q2) {
601623
isFork(q1, _, _, _, _) and q2 = q1
602624
or
603-
step(_, _, _, q1, q2) and q1.toString() <= q2.toString()
625+
step(_, _, _, q1, q2) and
626+
q1.toString() <= q2.toString()
604627
}
605628

606629
class StatePair extends TStatePair {
@@ -646,6 +669,7 @@ int statePairDist(StatePair q, StatePair r) =
646669
*/
647670
pragma[noopt]
648671
predicate isFork(State q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
672+
stateInsideBacktracking(q) and
649673
exists(State q1, State q2 |
650674
q1 = epsilonSucc*(q) and
651675
delta(q1, s1, r1) and
@@ -675,6 +699,9 @@ predicate step(StatePair q, InputSymbol s1, InputSymbol s2, StatePair r) {
675699
/**
676700
* Holds if there are transitions from the components of `q` to `r1` and `r2`
677701
* labelled with `s1` and `s2`, respectively.
702+
*
703+
* We only consider transitions where the resulting states `(r1, r2)` are both
704+
* inside a repetition that might backtrack.
678705
*/
679706
pragma[noopt]
680707
predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2) {
@@ -683,7 +710,9 @@ predicate step(StatePair q, InputSymbol s1, InputSymbol s2, State r1, State r2)
683710
deltaClosed(q2, s2, r2) and
684711
// use noopt to force the join on `intersect` to happen last.
685712
exists(intersect(s1, s2))
686-
)
713+
) and
714+
stateInsideBacktracking(r1) and
715+
stateInsideBacktracking(r2)
687716
}
688717

689718
/**

0 commit comments

Comments
 (0)