Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ac514b1

Browse files
committed
remove false positives where the analysis would wrongly conclude that the accept state could not be reached
1 parent 5f199e8 commit ac514b1

3 files changed

Lines changed: 123 additions & 41 deletions

File tree

javascript/ql/src/Performance/ReDoS.ql

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -711,16 +711,11 @@ private string getAOverlapBetweenCharacterClasses(CharacterClass c, CharacterCla
711711
*/
712712
string intersect(InputSymbol c, InputSymbol d) {
713713
c = Char(result) and
714+
d = getAnInputSymbolMatching(result) and
714715
(
715-
sharesRoot(c, d) and
716-
(
717-
d = Char(result)
718-
or
719-
d.(CharacterClass).matches(result)
720-
)
716+
sharesRoot(c, d)
721717
or
722-
d = Dot() and
723-
not (result = "\n" or result = "\r")
718+
d = Dot()
724719
or
725720
d = Any()
726721
)
@@ -749,6 +744,21 @@ string intersect(InputSymbol c, InputSymbol d) {
749744
result = intersect(d, c)
750745
}
751746

747+
/**
748+
* Gets a symbol that matches `char`.
749+
*/
750+
bindingset[char]
751+
InputSymbol getAnInputSymbolMatching(string char) {
752+
result = Char(char)
753+
or
754+
result.(CharacterClass).matches(char)
755+
or
756+
result = Dot() and
757+
not (char = "\n" or char = "\r")
758+
or
759+
result = Any()
760+
}
761+
752762
/**
753763
* Gets the char after `c` (from a simplified ASCII table).
754764
*/
@@ -825,14 +835,8 @@ predicate isPumpable(State fork, string w) {
825835
}
826836

827837
/**
828-
* Gets a state that can be reached from pumpable `fork` consuming
829-
* the first `i+1` characters of `w`.
830-
*
831-
* Character classes are overapproximated as intervals; for example,
832-
* `[a-ln-z]` is treated the same as `[a-z]`, and hence considered
833-
* to match `m`, even though in fact it does not. This is fine for
834-
* our purposes, since we only use this predicate to avoid false
835-
* positives.
838+
* Gets a state that can be reached from pumpable `fork` consuming all
839+
* chars in `w` any number of times followed by the first `i+1` characters of `w`.
836840
*/
837841
State process(State fork, string w, int i) {
838842
isPumpable(fork, w) and
@@ -841,11 +845,12 @@ State process(State fork, string w, int i) {
841845
i = 0 and prev = fork
842846
or
843847
prev = process(fork, w, i - 1)
848+
or
849+
// repeat until fixpoint
850+
i = 0 and
851+
prev = process(fork, w, w.length() - 1)
844852
|
845-
exists(InputSymbol s |
846-
deltaClosed(prev, s, result) and
847-
exists(intersect(Char(w.charAt(i)), s))
848-
)
853+
deltaClosed(prev, getAnInputSymbolMatching(w.charAt(i)), result)
849854
)
850855
}
851856

@@ -873,7 +878,7 @@ from RegExpTerm t, string c, int i
873878
where
874879
c = min(string w | isPumpable(Match(t, i), w)) and
875880
not isPumpable(epsilonSucc+(Match(t, i)), _) and
876-
not epsilonSucc*(process(Match(t, i), c, c.length() - 1)) = Accept(_)
881+
not epsilonSucc*(process(Match(t, i), c, [0 .. c.length() - 1])) = Accept(_)
877882
select t,
878883
"This part of the regular expression may cause exponential backtracking on strings " +
879884
"containing many repetitions of '" + escape(rotate(c, i)) + "'."

javascript/ql/test/query-tests/Performance/ReDoS/ReDoS.expected

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,10 @@
77
| regexplib/address.js:75:220:75:222 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
88
| regexplib/address.js:75:616:75:618 | \\w+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
99
| regexplib/address.js:75:803:75:811 | [A-Za-z]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'A'. |
10-
| regexplib/dates.js:66:133:66:139 | JANUARY | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'JANUARY'. |
11-
| regexplib/dates.js:66:141:66:148 | FEBRUARY | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'FEBRUARY'. |
12-
| regexplib/dates.js:66:150:66:154 | MARCH | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'MARCH'. |
13-
| regexplib/dates.js:66:156:66:160 | APRIL | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'APRIL'. |
14-
| regexplib/dates.js:66:162:66:164 | MAY | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'MAY'. |
15-
| regexplib/dates.js:66:166:66:169 | JUNE | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'JUNE'. |
16-
| regexplib/dates.js:66:171:66:174 | JULY | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'JULY'. |
17-
| regexplib/dates.js:66:176:66:181 | AUGUST | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'AUGUST'. |
18-
| regexplib/dates.js:66:183:66:191 | SEPTEMBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'SEPTEMBER'. |
19-
| regexplib/dates.js:66:193:66:199 | OCTOBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'OCTOBER'. |
20-
| regexplib/dates.js:66:201:66:208 | NOVEMBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'NOVEMBER'. |
21-
| regexplib/dates.js:66:210:66:217 | DECEMBER | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'DECEMBER'. |
22-
| regexplib/dates.js:66:234:66:240 | PRESENT | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'PRESENT'. |
2310
| regexplib/email.js:1:16:1:22 | [-.\\w]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
2411
| regexplib/email.js:5:24:5:35 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
2512
| regexplib/email.js:5:63:5:74 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
2613
| regexplib/email.js:6:10:6:35 | (?:[a-zA-Z0-9][\\.\\-\\+_]?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
27-
| regexplib/email.js:12:71:12:80 | ([-.]\\w+)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '.0.0,0@0'. |
2814
| regexplib/email.js:25:67:25:78 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
2915
| regexplib/email.js:25:106:25:117 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
3016
| regexplib/email.js:25:212:25:223 | [a-zA-Z0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
@@ -50,7 +36,6 @@
5036
| regexplib/misc.js:123:17:123:19 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
5137
| regexplib/misc.js:142:3:142:25 | (\\/w\|\\/W\|[^<>+?$%{}&])+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/W'. |
5238
| regexplib/misc.js:148:20:148:22 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
53-
| regexplib/misc.js:148:23:148:29 | [^"'=]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '> '. |
5439
| regexplib/strings.js:19:31:19:57 | [a-z&#230;&#248;&#229;0-9]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '#'. |
5540
| regexplib/strings.js:57:17:57:19 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
5641
| regexplib/strings.js:81:17:81:19 | \\d+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
@@ -65,7 +50,6 @@
6550
| regexplib/uri.js:63:393:63:429 | [a-zA-Z0-9\\.\\,\\?\\'\\\\/\\+&%\\$#\\=~_\\-@]* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/#'. |
6651
| tst.js:4:18:4:32 | (?:__\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '__'. |
6752
| tst.js:4:42:4:58 | (?:\\*\\*\|[\\s\\S])+? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '**'. |
68-
| tst.js:14:14:14:15 | .* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ','. |
6953
| tst.js:19:24:19:43 | (?:[^"\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
7054
| tst.js:19:47:19:66 | (?:[^'\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
7155
| tst.js:19:71:19:90 | (?:[^)\\\\]\|\\\\\\\\\|\\\\.)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\\\\\\\'. |
@@ -86,7 +70,6 @@
8670
| tst.js:83:14:83:20 | (.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
8771
| tst.js:89:25:89:32 | (a\|aa?)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
8872
| tst.js:95:15:95:25 | ([^]\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
89-
| tst.js:98:15:98:20 | [^"']+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '('. |
9073
| tst.js:101:15:101:23 | (.\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
9174
| tst.js:107:15:107:23 | (b\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
9275
| tst.js:110:15:110:23 | (G\|[^a])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
@@ -109,3 +92,19 @@
10992
| tst.js:167:15:167:27 | (1s\|[\\da-z])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '1s'. |
11093
| tst.js:170:15:170:23 | (0\|[\\d])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
11194
| tst.js:173:16:173:20 | [\\d]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
95+
| tst.js:188:17:188:21 | [^>]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '?'. |
96+
| tst.js:191:16:191:21 | [^>a]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
97+
| tst.js:194:17:194:19 | \\s* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
98+
| tst.js:197:18:197:20 | \\s+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
99+
| tst.js:200:68:200:79 | [ a-zA-Z{}]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' A:'. |
100+
| tst.js:200:81:200:82 | ,? | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ',A: '. |
101+
| tst.js:203:15:203:16 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
102+
| tst.js:203:18:203:19 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
103+
| tst.js:206:17:206:18 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
104+
| tst.js:209:15:209:16 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
105+
| tst.js:215:15:215:16 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
106+
| tst.js:221:15:221:17 | \\n+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
107+
| tst.js:224:15:224:19 | [^X]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'Y'. |
108+
| tst.js:227:20:227:20 | b | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'bY'. |
109+
| tst.js:233:20:233:20 | b | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'bY'. |
110+
| tst.js:248:16:248:17 | ab | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'ab'. |

javascript/ql/test/query-tests/Performance/ReDoS/tst.js

Lines changed: 81 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ var bad1 = /^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*)/;
88
// under the MIT license; see file marked-LICENSE.
99
var good1 = /^\b_((?:__|[^_])+?)_\b|^\*((?:\*\*|[^*])+?)\*(?!\*)/;
1010

11-
// NOT GOOD
11+
// GOOD - there is no witness in the end that could cause the regexp to not match
1212
// Adapted from brace-expansion (https://github.com/juliangruber/brace-expansion),
1313
// which is licensed under the MIT license; see file brace-expansion-LICENSE.
1414
var bad2 = /(.*,)+.+/;
@@ -94,7 +94,7 @@ var good9 = '(a|aa?)*b';
9494
// NOT GOOD
9595
var bad18 = /(([^]|[^a])*)"/;
9696

97-
// NOT GOOD
97+
// GOOD - there is no witness in the end that could cause the regexp to not match
9898
var bad19 = /([^"']+)*/g;
9999

100100
// NOT GOOD
@@ -170,4 +170,82 @@ var bad39 = /((1s|[\da-z])*)"/;
170170
var bad40 = /((0|[\d])*)"/;
171171

172172
// NOT GOOD
173-
var bad41 = /(([\d]+)*)"/;
173+
var bad41 = /(([\d]+)*)"/;
174+
175+
// GOOD - there is no witness in the end that could cause the regexp to not match
176+
var good12 = /(\d+(X\d+)?)+/;
177+
178+
// GOOD - there is no witness in the end that could cause the regexp to not match
179+
var good13 = /([0-9]+(X[0-9]*)?)*/;
180+
181+
// NOT GOOD
182+
var bad42 = /([\n\s]+)*(.)/;
183+
184+
// GOOD - any witness passes through the accept state.
185+
var good14 = /(A*A*X)*/;
186+
187+
// GOOD - but still flagged (always matches something)
188+
var good15 = /^([^>]+)*(>|$)/;
189+
190+
// NOT GOOD
191+
var bad43 = /^([^>a]+)*(>|$)/;
192+
193+
// NOT GOOD
194+
var bad44 = /(\n\s*)+$/;
195+
196+
// NOT GOOD
197+
var bad45 = /^(?:\s+|#.*|\(\?#[^)]*\))*(?:[?*+]|{\d+(?:,\d*)?})/;
198+
199+
// NOT GOOD
200+
var bad46 = /\{\[\s*([a-zA-Z]+)\(([a-zA-Z]+)\)((\s*([a-zA-Z]+)\: ?([ a-zA-Z{}]+),?)+)*\s*\]\}/;
201+
202+
// NOT GOOD
203+
var bad47 = /(a+|b+|c+)*c/;
204+
205+
// NOT GOOD
206+
var bad48 = /(((a+a?)*)+b+)/;
207+
208+
// NOT GOOD
209+
var bad49 = /(a+)+bbbb/;
210+
211+
// GOOD
212+
var good16 = /(a+)+aaaaa*a+/;
213+
214+
// NOT GOOD
215+
var bad50 = /(a+)+aaaaa$/;
216+
217+
// GOOD
218+
var good17 = /(\n+)+\n\n/;
219+
220+
// NOT GOOD
221+
var bad51 = /(\n+)+\n\n$/;
222+
223+
// NOT GOOD
224+
var bad52 = /([^X]+)*$/;
225+
226+
// NOT GOOD
227+
var bad53 = /(([^X]b)+)*$/;
228+
229+
// GOOD
230+
var good18 = /(([^X]b)+)*($|[^X]b)/;
231+
232+
// NOT GOOD
233+
var bad54 = /(([^X]b)+)*($|[^X]c)/;
234+
235+
// GOOD
236+
var good19 = /(.*,)+.+/;
237+
238+
// GOOD
239+
var good20 = /((ab)+)*ababab/;
240+
241+
// GOOD
242+
var good21 = /((ab)+)*abab(ab)*(ab)+/;
243+
244+
// GOOD
245+
var good22 = /((ab)+)*/;
246+
247+
// NOT GOOD
248+
var bad55 = /((ab)+)*$/;
249+
250+
// GOOD
251+
var good23 = /((ab)+)*[a1][b1][a2][b2][a3][b3]/;

0 commit comments

Comments
 (0)