Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4ede04f

Browse files
committed
improve performance by pruning based on shared root
1 parent 82252c0 commit 4ede04f

1 file changed

Lines changed: 38 additions & 9 deletions

File tree

javascript/ql/src/Performance/ReDoS.ql

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ RegExpRoot getRoot(RegExpTerm term) {
136136
*/
137137
newtype TInputSymbol =
138138
/** An input symbol corresponding to character `c`. */
139-
Char(string c) { c = any(RegExpConstant cc).getValue().charAt(_) } or
139+
Char(string c) { c = any(RegExpConstant cc | getRoot(cc).isRelevant()).getValue().charAt(_) } or
140140
/**
141141
* An input symbol representing all characters matched by
142142
* (positive, non-universal) character class `recc`.
@@ -162,6 +162,31 @@ newtype TInputSymbol =
162162
/** An epsilon transition in the automaton. */
163163
Epsilon()
164164

165+
/**
166+
* Holds if `a` and `b` are input symbols from the same regexp.
167+
* (And not a `Dot()`, `Any()` or `Epsilon()`)
168+
*/
169+
pragma[noinline]
170+
private predicate sharesRoot(TInputSymbol a, TInputSymbol b) {
171+
exists(RegExpRoot root |
172+
belongsTo(a, root) and
173+
belongsTo(b, root)
174+
)
175+
}
176+
177+
/**
178+
* Holds if the `a` is an inputsymbol from a regexp that has root `root`.
179+
*/
180+
private predicate belongsTo(TInputSymbol a, RegExpRoot root) {
181+
exists(RegExpTerm term | getRoot(term) = root |
182+
a = Char(term.(RegExpConstant).getValue().charAt(_))
183+
or
184+
a = CharClass(term)
185+
or
186+
a = InvertedCharClass(term)
187+
)
188+
}
189+
165190
/**
166191
* An abstract input symbol, representing a set of concrete characters.
167192
*/
@@ -485,6 +510,7 @@ private string getARelevantCharClassChar(TInputSymbol symbol) {
485510
* negative char class `d`.
486511
*/
487512
private string getAOverlapBetweenCharacterClasses(CharClass c, InvertedCharClass d) {
513+
sharesRoot(c, d) and
488514
result = [getARelevantCharClassChar(c), getARelevantCharClassChar(d)] and
489515
exists(RegExpCharacterClass negClass, RegExpCharacterClass posClass |
490516
c = CharClass(posClass) and
@@ -500,12 +526,15 @@ private string getAOverlapBetweenCharacterClasses(CharClass c, InvertedCharClass
500526
string intersect(InputSymbol c, InputSymbol d) {
501527
c = Char(result) and
502528
(
503-
d = Char(result)
504-
or
505-
exists(RegExpCharacterClass cc | d = CharClass(cc) | charClassMatchesChar(cc, result))
506-
or
507-
exists(RegExpCharacterClass cc | d = InvertedCharClass(cc) |
508-
not charClassMatchesChar(cc, result)
529+
sharesRoot(c, d) and
530+
(
531+
d = Char(result)
532+
or
533+
exists(RegExpCharacterClass cc | d = CharClass(cc) | charClassMatchesChar(cc, result))
534+
or
535+
exists(RegExpCharacterClass cc | d = InvertedCharClass(cc) |
536+
not charClassMatchesChar(cc, result)
537+
)
509538
)
510539
or
511540
d = Dot() and
@@ -517,7 +546,7 @@ string intersect(InputSymbol c, InputSymbol d) {
517546
result = getMinOverlapBetweenCharacterClasses(c, d)
518547
or
519548
exists(RegExpCharacterClass cc | c = InvertedCharClass(cc) and result = chooseFromInverted(cc) |
520-
d = InvertedCharClass(cc)
549+
d = InvertedCharClass(cc) and sharesRoot(c, d)
521550
or
522551
d = Dot() and
523552
not (result = "\n" or result = "\r")
@@ -526,7 +555,7 @@ string intersect(InputSymbol c, InputSymbol d) {
526555
)
527556
or
528557
exists(RegExpCharacterClass cc | c = CharClass(cc) and result = choose(cc) |
529-
d = CharClass(cc)
558+
d = CharClass(cc) and sharesRoot(c, d)
530559
or
531560
d = Dot() and
532561
not (result = "\n" or result = "\r")

0 commit comments

Comments
 (0)