Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 321cf09

Browse files
committed
add redos support for the simplest possible inverted char class
1 parent d04f3df commit 321cf09

3 files changed

Lines changed: 63 additions & 1 deletion

File tree

javascript/ql/src/Performance/ReDoS.ql

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,15 @@ newtype TInputSymbol =
146146
not recc.isInverted() and
147147
not recc.isUniversalClass()
148148
} or
149+
/**
150+
* An input symbol representing all characters matched by
151+
* the inverted (non-universal) character class `recc`.
152+
*/
153+
InvertedCharClass(RegExpCharacterClass recc) {
154+
getRoot(recc).isRelevant() and
155+
recc.isInverted() and
156+
not recc.isUniversalClass()
157+
} or
149158
/** An input symbol representing all characters matched by `.`. */
150159
Dot() or
151160
/** An input symbol representing all characters. */
@@ -164,6 +173,8 @@ class InputSymbol extends TInputSymbol {
164173
or
165174
result = any(RegExpCharacterClass recc | this = CharClass(recc)).toString()
166175
or
176+
result = any(RegExpCharacterClass recc | this = InvertedCharClass(recc)).toString()
177+
or
167178
this = Dot() and result = "."
168179
or
169180
this = Any() and result = "[^]"
@@ -266,7 +277,9 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
266277
exists(RegExpCharacterClass cc |
267278
cc.isUniversalClass() and q1 = before(cc) and lbl = Any() and q2 = after(cc)
268279
or
269-
q1 = before(cc) and lbl = CharClass(cc) and q2 = after(cc)
280+
q1 = before(cc) and
281+
(lbl = CharClass(cc) or lbl = InvertedCharClass(cc)) and
282+
q2 = after(cc)
270283
)
271284
or
272285
exists(RegExpAlt alt | lbl = Epsilon() | q1 = before(alt) and q2 = before(alt.getAChild()))
@@ -440,6 +453,16 @@ string intersect(InputSymbol c, InputSymbol d) {
440453
d = Any()
441454
)
442455
or
456+
exists(RegExpCharacterClass cc | c = InvertedCharClass(cc) and result = chooseFromInverted(cc) |
457+
// TODO: Not done here - later commits will add more
458+
//d = InvertedCharClass(cc)
459+
//or
460+
//d = Dot() and
461+
//not (result = "\n" or result = "\r")
462+
//or
463+
d = Any()
464+
)
465+
or
443466
exists(RegExpCharacterClass cc | c = CharClass(cc) and result = choose(cc) |
444467
d = CharClass(cc)
445468
or
@@ -465,6 +488,7 @@ string intersect(InputSymbol c, InputSymbol d) {
465488
* Gets a character matched by character class `cc`.
466489
*/
467490
string choose(RegExpCharacterClass cc) {
491+
exists(CharClass(cc)) and
468492
result =
469493
min(string c |
470494
exists(RegExpTerm child | child = cc.getAChild() |
@@ -474,6 +498,39 @@ string choose(RegExpCharacterClass cc) {
474498
)
475499
}
476500

501+
/**
502+
* Gets the char after `c` (from a simplified ASCII table).
503+
*/
504+
string nextChar(string c) { exists(int code | code = ascii(c) | code + 1 = ascii(result)) }
505+
506+
/**
507+
* Gets an approximation for the ASCII code for `char`.
508+
* Only the easily printable chars are included (so no newline, tab, null, etc).
509+
*/
510+
int ascii(string char) {
511+
char =
512+
rank[result](string c |
513+
c =
514+
"! \"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
515+
.charAt(_)
516+
)
517+
}
518+
519+
/**
520+
* Chooses a char matched by the inverted char class `cc`.
521+
*/
522+
string chooseFromInverted(RegExpCharacterClass cc) {
523+
exists(InvertedCharClass(cc)) and
524+
// The next char after the max of the inverted charclass.
525+
result =
526+
nextChar(max(string c |
527+
exists(RegExpTerm child | child = cc.getAChild() |
528+
c = child.(RegExpConstant).getValue() or
529+
child.(RegExpCharacterRange).isRange(_, c)
530+
)
531+
))
532+
}
533+
477534
/**
478535
* Gets a string corresponding to the trace `t`.
479536
*/

javascript/ql/test/query-tests/Performance/ReDoS/ReDoS.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,4 @@
5252
| tst.js:77:14:77:21 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
5353
| tst.js:83:14:83:20 | (.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
5454
| tst.js:89:25:89:32 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
55+
| tst.js:95:15:95:25 | ([^]\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |

javascript/ql/test/query-tests/Performance/ReDoS/tst.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,7 @@ var bad17 = new RegExp('(a|aa?)*b');
9090

9191
// GOOD - not used as regexp
9292
var good9 = '(a|aa?)*b';
93+
94+
// NOT GOOD
95+
var bad18 = /(([^]|[^a])*)"/;
96+

0 commit comments

Comments
 (0)