Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 69ed121

Browse files
committed
Ruby/Python: regex parser: group sequences of 'normal' characters
1 parent 36e02ae commit 69ed121

9 files changed

Lines changed: 166 additions & 231 deletions

File tree

python/ql/lib/semmle/python/RegexTreeView.qll

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,12 @@ newtype TRegExpParent =
3939
/** A special character */
4040
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
4141
/** A normal character */
42-
TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
42+
TRegExpNormalChar(Regex re, int start, int end) {
43+
re.normalCharacterSequence(start, end)
44+
or
45+
re.escapedCharacter(start, end) and
46+
not re.specialCharacter(start, end, _)
47+
} or
4348
/** A back reference */
4449
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
4550

python/ql/lib/semmle/python/regex.qll

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,45 @@ abstract class RegexString extends Expr {
446446
)
447447
}
448448

449+
/**
450+
* A sequence of 'normal' characters.
451+
*/
452+
predicate normalCharacterSequence(int start, int end) {
453+
this.normalCharacter(start, end) and
454+
end = start + 1 and
455+
exists(int x, int y | this.charSet(x, y) and x <= start and y >= end)
456+
or
457+
exists(int s, int e |
458+
e = max(int i | normalCharacterSub(s, i)) and
459+
not exists(int x, int y | this.charSet(x, y) and x <= s and y >= e)
460+
|
461+
if qualifier(e, _, _, _)
462+
then
463+
end = e and start = e - 1
464+
or
465+
end = e - 1 and start = s and start < end
466+
else (
467+
end = e and
468+
start = s
469+
)
470+
)
471+
}
472+
473+
private predicate normalCharacterSub(int start, int end) {
474+
(
475+
normalCharacterSub(start, end - 1)
476+
or
477+
start = end - 1 and not normalCharacter(start - 1, start)
478+
) and
479+
this.normalCharacter(end - 1, end)
480+
}
481+
482+
private predicate characterItem(int start, int end) {
483+
this.normalCharacterSequence(start, end) or
484+
this.escapedCharacter(start, end) or
485+
this.specialCharacter(start, end, _)
486+
}
487+
449488
/** Whether the text in the range start,end is a group */
450489
predicate group(int start, int end) {
451490
this.groupContents(start, end, _, _)
@@ -717,7 +756,7 @@ abstract class RegexString extends Expr {
717756
string getBackrefName(int start, int end) { this.named_backreference(start, end, result) }
718757

719758
private predicate baseItem(int start, int end) {
720-
this.character(start, end) and
759+
this.characterItem(start, end) and
721760
not exists(int x, int y | this.charSet(x, y) and x <= start and y >= end)
722761
or
723762
this.group(start, end)
@@ -837,14 +876,14 @@ abstract class RegexString extends Expr {
837876
}
838877

839878
private predicate item_start(int start) {
840-
this.character(start, _) or
879+
this.characterItem(start, _) or
841880
this.isGroupStart(start) or
842881
this.charSet(start, _) or
843882
this.backreference(start, _)
844883
}
845884

846885
private predicate item_end(int end) {
847-
this.character(_, end)
886+
this.characterItem(_, end)
848887
or
849888
exists(int endm1 | this.isGroupEnd(endm1) and end = endm1 + 1)
850889
or
@@ -953,7 +992,7 @@ abstract class RegexString extends Expr {
953992
*/
954993
predicate firstItem(int start, int end) {
955994
(
956-
this.character(start, end)
995+
this.characterItem(start, end)
957996
or
958997
this.qualifiedItem(start, end, _, _)
959998
or
@@ -968,7 +1007,7 @@ abstract class RegexString extends Expr {
9681007
*/
9691008
predicate lastItem(int start, int end) {
9701009
(
971-
this.character(start, end)
1010+
this.characterItem(start, end)
9721011
or
9731012
this.qualifiedItem(start, end, _, _)
9741013
or

python/ql/test/library-tests/regex/FirstLast.expected

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
| 012345678 | first | 0 | 1 |
2-
| 012345678 | last | 8 | 9 |
3-
| (?!not-this)^[A-Z_]+$ | first | 3 | 4 |
1+
| 012345678 | first | 0 | 9 |
2+
| 012345678 | last | 0 | 9 |
3+
| (?!not-this)^[A-Z_]+$ | first | 3 | 11 |
44
| (?!not-this)^[A-Z_]+$ | first | 12 | 13 |
55
| (?!not-this)^[A-Z_]+$ | first | 13 | 19 |
66
| (?!not-this)^[A-Z_]+$ | first | 13 | 20 |
@@ -27,9 +27,9 @@
2727
| (?m)^(?!$) | last | 4 | 5 |
2828
| (?m)^(?!$) | last | 8 | 9 |
2929
| (\\033\|~{) | first | 1 | 5 |
30-
| (\\033\|~{) | first | 6 | 7 |
30+
| (\\033\|~{) | first | 6 | 8 |
3131
| (\\033\|~{) | last | 1 | 5 |
32-
| (\\033\|~{) | last | 7 | 8 |
32+
| (\\033\|~{) | last | 6 | 8 |
3333
| [\ufffd-\ufffd] | first | 0 | 5 |
3434
| [\ufffd-\ufffd] | last | 0 | 5 |
3535
| [\ufffd-\ufffd][\ufffd-\ufffd] | first | 0 | 5 |
@@ -52,8 +52,8 @@
5252
| \\A[+-]?\\d+ | last | 7 | 9 |
5353
| \\A[+-]?\\d+ | last | 7 | 10 |
5454
| \\Afoo\\Z | first | 0 | 2 |
55-
| \\Afoo\\Z | first | 2 | 3 |
56-
| \\Afoo\\Z | last | 4 | 5 |
55+
| \\Afoo\\Z | first | 2 | 5 |
56+
| \\Afoo\\Z | last | 2 | 5 |
5757
| \\Afoo\\Z | last | 5 | 7 |
5858
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | first | 0 | 2 |
5959
| \\[(?P<txt>[^[]*)\\]\\((?P<uri>[^)]*) | last | 28 | 32 |
@@ -86,30 +86,30 @@
8686
| ^[A-Z_]+$(?<!not-this) | last | 1 | 7 |
8787
| ^[A-Z_]+$(?<!not-this) | last | 1 | 8 |
8888
| ^[A-Z_]+$(?<!not-this) | last | 8 | 9 |
89-
| ^[A-Z_]+$(?<!not-this) | last | 20 | 21 |
89+
| ^[A-Z_]+$(?<!not-this) | last | 13 | 21 |
9090
| ax{01,3} | first | 0 | 1 |
9191
| ax{01,3} | last | 1 | 2 |
9292
| ax{01,3} | last | 1 | 8 |
93-
| ax{01,3} | last | 7 | 8 |
93+
| ax{01,3} | last | 3 | 8 |
9494
| ax{3,} | first | 0 | 1 |
9595
| ax{3,} | last | 1 | 2 |
9696
| ax{3,} | last | 1 | 6 |
97-
| ax{3,} | last | 5 | 6 |
97+
| ax{3,} | last | 3 | 6 |
9898
| ax{3} | first | 0 | 1 |
9999
| ax{3} | last | 1 | 2 |
100100
| ax{3} | last | 1 | 5 |
101-
| ax{3} | last | 4 | 5 |
101+
| ax{3} | last | 3 | 5 |
102102
| ax{,3} | first | 0 | 1 |
103103
| ax{,3} | last | 0 | 1 |
104104
| ax{,3} | last | 1 | 2 |
105105
| ax{,3} | last | 1 | 6 |
106-
| ax{,3} | last | 5 | 6 |
106+
| ax{,3} | last | 3 | 6 |
107107
| x\| | first | 0 | 1 |
108108
| x\| | last | 0 | 1 |
109109
| x\|(?<!\\w)l | first | 0 | 1 |
110110
| x\|(?<!\\w)l | first | 6 | 8 |
111111
| x\|(?<!\\w)l | first | 9 | 10 |
112112
| x\|(?<!\\w)l | last | 0 | 1 |
113113
| x\|(?<!\\w)l | last | 9 | 10 |
114-
| x{Not qual} | first | 0 | 1 |
115-
| x{Not qual} | last | 10 | 11 |
114+
| x{Not qual} | first | 0 | 11 |
115+
| x{Not qual} | last | 0 | 11 |

python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
| redos.py:220:25:220:29 | [^X]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'W'. |
6060
| redos.py:223:30:223:30 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. |
6161
| redos.py:229:30:229:30 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'W' and containing many repetitions of 'bW'. |
62-
| redos.py:241:27:241:27 | b | This part of the regular expression may cause exponential backtracking on strings starting with 'a' and containing many repetitions of 'ba'. |
62+
| redos.py:241:26:241:27 | ab | This part of the regular expression may cause exponential backtracking on strings starting with 'a' and containing many repetitions of 'ab'. |
6363
| redos.py:247:25:247:31 | [\\n\\s]+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |
6464
| redos.py:256:25:256:27 | \\w* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |
6565
| redos.py:256:37:256:39 | \\w* | This part of the regular expression may cause exponential backtracking on strings starting with 'foobarbaz' and containing many repetitions of 'foobarbazfoobarbazfoobarbazfoobarbazfoobarbazfoobarbaz'. |

ruby/ql/lib/codeql/ruby/security/performance/ParseRegExp.qll

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,45 @@ class RegExp extends AST::RegExpLiteral {
401401
)
402402
}
403403

404+
/**
405+
* A sequence of 'normal' characters.
406+
*/
407+
predicate normalCharacterSequence(int start, int end) {
408+
this.normalCharacter(start, end) and
409+
end = start + 1 and
410+
exists(int x, int y | this.charSet(x, y) and x <= start and y >= end)
411+
or
412+
exists(int s, int e |
413+
e = max(int i | normalCharacterSub(s, i)) and
414+
not exists(int x, int y | this.charSet(x, y) and x <= s and y >= e)
415+
|
416+
if qualifier(e, _, _, _)
417+
then
418+
end = e and start = e - 1
419+
or
420+
end = e - 1 and start = s and start < end
421+
else (
422+
end = e and
423+
start = s
424+
)
425+
)
426+
}
427+
428+
private predicate normalCharacterSub(int start, int end) {
429+
(
430+
normalCharacterSub(start, end - 1)
431+
or
432+
start = end - 1 and not normalCharacter(start - 1, start)
433+
) and
434+
this.normalCharacter(end - 1, end)
435+
}
436+
437+
private predicate characterItem(int start, int end) {
438+
this.normalCharacterSequence(start, end) or
439+
this.escapedCharacter(start, end) or
440+
this.specialCharacter(start, end, _)
441+
}
442+
404443
/** Whether the text in the range `start,end` is a group */
405444
predicate group(int start, int end) {
406445
this.groupContents(start, end, _, _)
@@ -639,7 +678,7 @@ class RegExp extends AST::RegExpLiteral {
639678
string getBackRefName(int start, int end) { this.namedBackreference(start, end, result) }
640679

641680
private predicate baseItem(int start, int end) {
642-
this.character(start, end) and
681+
this.characterItem(start, end) and
643682
not exists(int x, int y | this.charSet(x, y) and x <= start and y >= end)
644683
or
645684
this.group(start, end)
@@ -746,15 +785,15 @@ class RegExp extends AST::RegExpLiteral {
746785
}
747786

748787
private predicate itemStart(int start) {
749-
this.character(start, _) or
788+
this.characterItem(start, _) or
750789
this.isGroupStart(start) or
751790
this.charSet(start, _) or
752791
this.backreference(start, _) or
753792
this.namedCharacterProperty(start, _, _)
754793
}
755794

756795
private predicate itemEnd(int end) {
757-
this.character(_, end)
796+
this.characterItem(_, end)
758797
or
759798
exists(int endm1 | this.isGroupEnd(endm1) and end = endm1 + 1)
760799
or
@@ -865,7 +904,7 @@ class RegExp extends AST::RegExpLiteral {
865904
*/
866905
predicate firstItem(int start, int end) {
867906
(
868-
this.character(start, end)
907+
this.characterItem(start, end)
869908
or
870909
this.qualifiedItem(start, end, _, _)
871910
or
@@ -880,7 +919,7 @@ class RegExp extends AST::RegExpLiteral {
880919
*/
881920
predicate lastItem(int start, int end) {
882921
(
883-
this.character(start, end)
922+
this.characterItem(start, end)
884923
or
885924
this.qualifiedItem(start, end, _, _)
886925
or

ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,12 @@ newtype TRegExpParent =
228228
TRegExpCharacterRange(RegExp re, int start, int end) { re.charRange(_, start, _, _, end) } or
229229
TRegExpGroup(RegExp re, int start, int end) { re.group(start, end) } or
230230
TRegExpSpecialChar(RegExp re, int start, int end) { re.specialCharacter(start, end, _) } or
231-
TRegExpNormalChar(RegExp re, int start, int end) { re.normalCharacter(start, end) } or
231+
TRegExpNormalChar(RegExp re, int start, int end) {
232+
re.normalCharacterSequence(start, end)
233+
or
234+
re.escapedCharacter(start, end) and
235+
not re.specialCharacter(start, end, _)
236+
} or
232237
TRegExpBackRef(RegExp re, int start, int end) { re.backreference(start, end) } or
233238
TRegExpNamedCharacterProperty(RegExp re, int start, int end) {
234239
re.namedCharacterProperty(start, end, _)

0 commit comments

Comments
 (0)