Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f9f7a01

Browse files
Add Java ReDoS libraries to identical-files.json
1 parent 11e465f commit f9f7a01

3 files changed

Lines changed: 120 additions & 55 deletions

File tree

config/identical-files.json

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -475,20 +475,23 @@
475475
"python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll",
476476
"ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll"
477477
],
478-
"ReDoS Util Python/JS/Ruby": [
478+
"ReDoS Util Python/JS/Ruby/Java": [
479479
"javascript/ql/lib/semmle/javascript/security/performance/ReDoSUtil.qll",
480480
"python/ql/lib/semmle/python/security/performance/ReDoSUtil.qll",
481-
"ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll"
481+
"ruby/ql/lib/codeql/ruby/security/performance/ReDoSUtil.qll",
482+
"java/ql/lib/semmle/code/java/security/performance/ReDoSUtil.qll"
482483
],
483-
"ReDoS Exponential Python/JS/Ruby": [
484+
"ReDoS Exponential Python/JS/Ruby/Java": [
484485
"javascript/ql/lib/semmle/javascript/security/performance/ExponentialBackTracking.qll",
485486
"python/ql/lib/semmle/python/security/performance/ExponentialBackTracking.qll",
486-
"ruby/ql/lib/codeql/ruby/security/performance/ExponentialBackTracking.qll"
487+
"ruby/ql/lib/codeql/ruby/security/performance/ExponentialBackTracking.qll",
488+
"java/ql/lib/semmle/code/java/security/performance/ExponentialBackTracking.qll"
487489
],
488-
"ReDoS Polynomial Python/JS/Ruby": [
490+
"ReDoS Polynomial Python/JS/Ruby/Java": [
489491
"javascript/ql/lib/semmle/javascript/security/performance/SuperlinearBackTracking.qll",
490492
"python/ql/lib/semmle/python/security/performance/SuperlinearBackTracking.qll",
491-
"ruby/ql/lib/codeql/ruby/security/performance/SuperlinearBackTracking.qll"
493+
"ruby/ql/lib/codeql/ruby/security/performance/SuperlinearBackTracking.qll",
494+
"java/ql/lib/semmle/code/java/security/performance/SuperlinearBackTracking.qll"
492495
],
493496
"BadTagFilterQuery Python/JS/Ruby": [
494497
"javascript/ql/lib/semmle/javascript/security/BadTagFilterQuery.qll",

java/ql/lib/semmle/code/java/security/performance/ReDoSUtil.qll

Lines changed: 103 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,9 @@ class RegExpRoot extends RegExpTerm {
140140
// there is at least one repetition
141141
getRoot(any(InfiniteRepetitionQuantifier q)) = this and
142142
// is actually used as a RegExp
143-
isUsedAsRegExp() and
143+
this.isUsedAsRegExp() and
144144
// not excluded for library specific reasons
145-
not isExcluded(getRootTerm().getParent())
145+
not isExcluded(this.getRootTerm().getParent())
146146
}
147147
}
148148

@@ -218,7 +218,7 @@ private newtype TInputSymbol =
218218
recc instanceof RegExpCharacterClass and
219219
not recc.(RegExpCharacterClass).isUniversalClass()
220220
or
221-
recc instanceof RegExpCharacterClassEscape
221+
isEscapeClass(recc, _)
222222
)
223223
} or
224224
/** An input symbol representing all characters matched by `.`. */
@@ -302,7 +302,7 @@ abstract class CharacterClass extends InputSymbol {
302302
/**
303303
* Gets a character matched by this character class.
304304
*/
305-
string choose() { result = getARelevantChar() and matches(result) }
305+
string choose() { result = this.getARelevantChar() and this.matches(result) }
306306
}
307307

308308
/**
@@ -340,13 +340,13 @@ private module CharacterClasses {
340340
char <= hi
341341
)
342342
or
343-
exists(RegExpCharacterClassEscape escape | escape = child |
344-
escape.getValue() = escape.getValue().toLowerCase() and
345-
classEscapeMatches(escape.getValue(), char)
343+
exists(string charClass | isEscapeClass(child, charClass) |
344+
charClass.toLowerCase() = charClass and
345+
classEscapeMatches(charClass, char)
346346
or
347347
char = getARelevantChar() and
348-
escape.getValue() = escape.getValue().toUpperCase() and
349-
not classEscapeMatches(escape.getValue().toLowerCase(), char)
348+
charClass.toUpperCase() = charClass and
349+
not classEscapeMatches(charClass, char)
350350
)
351351
)
352352
}
@@ -409,10 +409,10 @@ private module CharacterClasses {
409409
or
410410
child.(RegExpCharacterRange).isRange(_, result)
411411
or
412-
exists(RegExpCharacterClassEscape escape | child = escape |
413-
result = min(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
412+
exists(string charClass | isEscapeClass(child, charClass) |
413+
result = min(string s | classEscapeMatches(charClass.toLowerCase(), s))
414414
or
415-
result = max(string s | classEscapeMatches(escape.getValue().toLowerCase(), s))
415+
result = max(string s | classEscapeMatches(charClass.toLowerCase(), s))
416416
)
417417
)
418418
}
@@ -466,33 +466,36 @@ private module CharacterClasses {
466466
* An implementation of `CharacterClass` for \d, \s, and \w.
467467
*/
468468
private class PositiveCharacterClassEscape extends CharacterClass {
469-
RegExpCharacterClassEscape cc;
469+
RegExpTerm cc;
470+
string charClass;
470471

471472
PositiveCharacterClassEscape() {
472-
this = getCanonicalCharClass(cc) and cc.getValue() = ["d", "s", "w"]
473+
isEscapeClass(cc, charClass) and
474+
this = getCanonicalCharClass(cc) and
475+
charClass = ["d", "s", "w"]
473476
}
474477

475478
override string getARelevantChar() {
476-
cc.getValue() = "d" and
479+
charClass = "d" and
477480
result = ["0", "9"]
478481
or
479-
cc.getValue() = "s" and
482+
charClass = "s" and
480483
result = " "
481484
or
482-
cc.getValue() = "w" and
485+
charClass = "w" and
483486
result = ["a", "Z", "_", "0", "9"]
484487
}
485488

486-
override predicate matches(string char) { classEscapeMatches(cc.getValue(), char) }
489+
override predicate matches(string char) { classEscapeMatches(charClass, char) }
487490

488491
override string choose() {
489-
cc.getValue() = "d" and
492+
charClass = "d" and
490493
result = "9"
491494
or
492-
cc.getValue() = "s" and
495+
charClass = "s" and
493496
result = " "
494497
or
495-
cc.getValue() = "w" and
498+
charClass = "w" and
496499
result = "a"
497500
}
498501
}
@@ -501,26 +504,29 @@ private module CharacterClasses {
501504
* An implementation of `CharacterClass` for \D, \S, and \W.
502505
*/
503506
private class NegativeCharacterClassEscape extends CharacterClass {
504-
RegExpCharacterClassEscape cc;
507+
RegExpTerm cc;
508+
string charClass;
505509

506510
NegativeCharacterClassEscape() {
507-
this = getCanonicalCharClass(cc) and cc.getValue() = ["D", "S", "W"]
511+
isEscapeClass(cc, charClass) and
512+
this = getCanonicalCharClass(cc) and
513+
charClass = ["D", "S", "W"]
508514
}
509515

510516
override string getARelevantChar() {
511-
cc.getValue() = "D" and
517+
charClass = "D" and
512518
result = ["a", "Z", "!"]
513519
or
514-
cc.getValue() = "S" and
520+
charClass = "S" and
515521
result = ["a", "9", "!"]
516522
or
517-
cc.getValue() = "W" and
523+
charClass = "W" and
518524
result = [" ", "!"]
519525
}
520526

521527
bindingset[char]
522528
override predicate matches(string char) {
523-
not classEscapeMatches(cc.getValue().toLowerCase(), char)
529+
not classEscapeMatches(charClass.toLowerCase(), char)
524530
}
525531
}
526532
}
@@ -533,6 +539,55 @@ private class EdgeLabel extends TInputSymbol {
533539
}
534540
}
535541

542+
/**
543+
* A RegExp term that acts like a plus.
544+
* Either it's a RegExpPlus, or it is a range {1,X} where X is >= 30.
545+
* 30 has been chosen as a threshold because for exponential blowup 2^30 is enough to get a decent DOS attack.
546+
*/
547+
private class EffectivelyPlus extends RegExpTerm {
548+
EffectivelyPlus() {
549+
this instanceof RegExpPlus
550+
or
551+
exists(RegExpRange range |
552+
range.getLowerBound() = 1 and
553+
(range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
554+
|
555+
this = range
556+
)
557+
}
558+
}
559+
560+
/**
561+
* A RegExp term that acts like a star.
562+
* Either it's a RegExpStar, or it is a range {0,X} where X is >= 30.
563+
*/
564+
private class EffectivelyStar extends RegExpTerm {
565+
EffectivelyStar() {
566+
this instanceof RegExpStar
567+
or
568+
exists(RegExpRange range |
569+
range.getLowerBound() = 0 and
570+
(range.getUpperBound() >= 30 or not exists(range.getUpperBound()))
571+
|
572+
this = range
573+
)
574+
}
575+
}
576+
577+
/**
578+
* A RegExp term that acts like a question mark.
579+
* Either it's a RegExpQuestion, or it is a range {0,1}.
580+
*/
581+
private class EffectivelyQuestion extends RegExpTerm {
582+
EffectivelyQuestion() {
583+
this instanceof RegExpOpt
584+
or
585+
exists(RegExpRange range | range.getLowerBound() = 0 and range.getUpperBound() = 1 |
586+
this = range
587+
)
588+
}
589+
}
590+
536591
/**
537592
* Gets the state before matching `t`.
538593
*/
@@ -542,7 +597,7 @@ private State before(RegExpTerm t) { result = Match(t, 0) }
542597
/**
543598
* Gets a state the NFA may be in after matching `t`.
544599
*/
545-
private State after(RegExpTerm t) {
600+
State after(RegExpTerm t) {
546601
exists(RegExpAlt alt | t = alt.getAChild() | result = after(alt))
547602
or
548603
exists(RegExpSequence seq, int i | t = seq.getChild(i) |
@@ -553,14 +608,14 @@ private State after(RegExpTerm t) {
553608
or
554609
exists(RegExpGroup grp | t = grp.getAChild() | result = after(grp))
555610
or
556-
exists(RegExpStar star | t = star.getAChild() | result = before(star))
611+
exists(EffectivelyStar star | t = star.getAChild() | result = before(star))
557612
or
558-
exists(RegExpPlus plus | t = plus.getAChild() |
613+
exists(EffectivelyPlus plus | t = plus.getAChild() |
559614
result = before(plus) or
560615
result = after(plus)
561616
)
562617
or
563-
exists(RegExpOpt opt | t = opt.getAChild() | result = after(opt))
618+
exists(EffectivelyQuestion opt | t = opt.getAChild() | result = after(opt))
564619
or
565620
exists(RegExpRoot root | t = root | result = AcceptAnySuffix(root))
566621
}
@@ -599,7 +654,7 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
599654
q2 = after(cc)
600655
)
601656
or
602-
exists(RegExpCharacterClassEscape cc |
657+
exists(RegExpTerm cc | isEscapeClass(cc, _) |
603658
q1 = before(cc) and
604659
lbl = CharClass(cc.getRawValue() + "|" + getCanonicalizationFlags(cc.getRootTerm())) and
605660
q2 = after(cc)
@@ -611,15 +666,17 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
611666
or
612667
exists(RegExpGroup grp | lbl = Epsilon() | q1 = before(grp) and q2 = before(grp.getChild(0)))
613668
or
614-
exists(RegExpStar star | lbl = Epsilon() |
669+
exists(EffectivelyStar star | lbl = Epsilon() |
615670
q1 = before(star) and q2 = before(star.getChild(0))
616671
or
617672
q1 = before(star) and q2 = after(star)
618673
)
619674
or
620-
exists(RegExpPlus plus | lbl = Epsilon() | q1 = before(plus) and q2 = before(plus.getChild(0)))
675+
exists(EffectivelyPlus plus | lbl = Epsilon() |
676+
q1 = before(plus) and q2 = before(plus.getChild(0))
677+
)
621678
or
622-
exists(RegExpOpt opt | lbl = Epsilon() |
679+
exists(EffectivelyQuestion opt | lbl = Epsilon() |
623680
q1 = before(opt) and q2 = before(opt.getChild(0))
624681
or
625682
q1 = before(opt) and q2 = after(opt)
@@ -671,7 +728,7 @@ RegExpRoot getRoot(RegExpTerm term) {
671728
/**
672729
* A state in the NFA.
673730
*/
674-
private newtype TState =
731+
newtype TState =
675732
/**
676733
* A state representing that the NFA is about to match a term.
677734
* `i` is used to index into multi-char literals.
@@ -801,29 +858,26 @@ InputSymbol getAnInputSymbolMatching(string char) {
801858
result = Any()
802859
}
803860

861+
/**
862+
* Holds if `state` is a start state.
863+
*/
864+
predicate isStartState(State state) {
865+
state = mkMatch(any(RegExpRoot r))
866+
or
867+
exists(RegExpCaret car | state = after(car))
868+
}
869+
804870
/**
805871
* Predicates for constructing a prefix string that leads to a given state.
806872
*/
807873
private module PrefixConstruction {
808-
/**
809-
* Holds if `state` starts the string matched by the regular expression.
810-
*/
811-
private predicate isStartState(State state) {
812-
state instanceof StateInPumpableRegexp and
813-
(
814-
state = Match(any(RegExpRoot r), _)
815-
or
816-
exists(RegExpCaret car | state = after(car))
817-
)
818-
}
819-
820874
/**
821875
* Holds if `state` is the textually last start state for the regular expression.
822876
*/
823877
private predicate lastStartState(State state) {
824878
exists(RegExpRoot root |
825879
state =
826-
max(State s, Location l |
880+
max(StateInPumpableRegexp s, Location l |
827881
isStartState(s) and getRoot(s.getRepr()) = root and l = s.getRepr().getLocation()
828882
|
829883
s

java/ql/lib/semmle/code/java/security/performance/RegExpTreeView.qll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@
55
import java
66
import semmle.code.java.regex.RegexTreeView
77

8+
/**
9+
* Holds if `term` is an ecape class representing e.g. `\d`.
10+
* `clazz` is which character class it represents, e.g. "d" for `\d`.
11+
*/
12+
predicate isEscapeClass(RegExpTerm term, string clazz) {
13+
exists(RegExpCharacterClassEscape escape | term = escape | escape.getValue() = clazz)
14+
}
15+
816
/**
917
* Holds if the regular expression should not be considered.
1018
*

0 commit comments

Comments
 (0)