Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 57ba8a4

Browse files
Improve handling of hex escapes; and support some named character classes
1 parent 5143585 commit 57ba8a4

3 files changed

Lines changed: 96 additions & 23 deletions

File tree

java/ql/lib/semmle/code/java/regex/RegexTreeView.qll

Lines changed: 58 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class RegExpParent extends TRegExpParent {
5555
string toString() { result = "RegExpParent" }
5656

5757
/** Gets the `i`th child term. */
58-
abstract RegExpTerm getChild(int i);
58+
RegExpTerm getChild(int i) { none() }
5959

6060
/** Gets a child term . */
6161
RegExpTerm getAChild() { result = this.getChild(_) }
@@ -143,26 +143,6 @@ class RegExpTerm extends RegExpParent {
143143
*/
144144
predicate isRootTerm() { start = 0 and end = re.getText().length() }
145145

146-
override RegExpTerm getChild(int i) {
147-
result = this.(RegExpAlt).getChild(i)
148-
or
149-
result = this.(RegExpBackRef).getChild(i)
150-
or
151-
result = this.(RegExpCharacterClass).getChild(i)
152-
or
153-
result = this.(RegExpCharacterRange).getChild(i)
154-
or
155-
result = this.(RegExpNormalChar).getChild(i)
156-
or
157-
result = this.(RegExpGroup).getChild(i)
158-
or
159-
result = this.(RegExpQuantifier).getChild(i)
160-
or
161-
result = this.(RegExpSequence).getChild(i)
162-
or
163-
result = this.(RegExpSpecialChar).getChild(i)
164-
}
165-
166146
/**
167147
* Gets the parent term of this regular expression term, or the
168148
* regular expression literal if this is the root term.
@@ -508,7 +488,7 @@ class RegExpEscape extends RegExpNormalChar {
508488
/**
509489
* Holds if this is a unicode escape.
510490
*/
511-
private predicate isUnicode() { this.getText().matches("\\u%") }
491+
private predicate isUnicode() { this.getText().matches(["\\u%", "\\x%"]) }
512492

513493
/**
514494
* Gets the unicode char for this escape.
@@ -520,13 +500,24 @@ class RegExpEscape extends RegExpNormalChar {
520500
)
521501
}
522502

503+
/** Gets the part of this escape that is a hexidecimal string */
504+
private string getHexString() {
505+
this.isUnicode() and
506+
if this.getText().matches("\\u%") // \uhhhh
507+
then result = this.getText().suffix(2)
508+
else
509+
if this.getText().matches("\\x{%") // \x{h..h}
510+
then result = this.getText().substring(3, this.getText().length() - 1)
511+
else result = this.getText().suffix(2) // \xhh
512+
}
513+
523514
/**
524515
* Gets int value for the `index`th char in the hex number of the unicode escape.
525516
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
526517
*/
527518
private int getHexValueFromUnicode(int index) {
528519
this.isUnicode() and
529-
exists(string hex, string char | hex = this.getText().suffix(2) |
520+
exists(string hex, string char | hex = this.getHexString() |
530521
char = hex.charAt(index) and
531522
result = 16.pow(hex.length() - index - 1) * toHex(char)
532523
)
@@ -574,6 +565,50 @@ class RegExpCharacterClassEscape extends RegExpEscape {
574565
override string getPrimaryQLClass() { result = "RegExpCharacterClassEscape" }
575566
}
576567

568+
/**
569+
* A named character class in a regular expression.
570+
*
571+
* Examples:
572+
*
573+
* ```
574+
* \p{Digit}
575+
* \p{IsLowerCase}
576+
*/
577+
class RegExpNamedProperty extends RegExpCharacterClassEscape {
578+
boolean inverted;
579+
string name;
580+
581+
RegExpNamedProperty() {
582+
name = this.getValue().substring(2, this.getValue().length() - 1) and
583+
(
584+
inverted = false and
585+
this.getValue().charAt(0) = "p"
586+
or
587+
inverted = true and
588+
this.getValue().charAt(0) = "P"
589+
)
590+
}
591+
592+
/** Holds if this class is inverted. */
593+
predicate isInverted() { inverted = true }
594+
595+
/** Gets the name of this class. */
596+
string getClassName() { result = name }
597+
598+
/**
599+
* Gets an equivalent single-chcracter escape sequence for this class (e.g. \d) if possible, excluding the escape character.
600+
*/
601+
string getBackslashEquivalent() {
602+
exists(string eq | if inverted = true then result = eq.toUpperCase() else result = eq |
603+
name = ["Digit", "IsDigit"] and
604+
eq = "d"
605+
or
606+
name = ["Space", "IsWhite_Space"] and
607+
eq = "s"
608+
)
609+
}
610+
}
611+
577612
/**
578613
* A character class in a regular expression.
579614
*

java/ql/lib/semmle/code/java/security/performance/RegExpTreeView.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import semmle.code.java.regex.RegexTreeView
1212
*/
1313
predicate isEscapeClass(RegExpTerm term, string clazz) {
1414
term.(RegExpCharacterClassEscape).getValue() = clazz
15+
or
16+
term.(RegExpNamedProperty).getBackslashEquivalent() = clazz
1517
}
1618

1719
/**

java/ql/test/query-tests/security/CWE-730/ExpRedosTest.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,42 @@ class ExpRedosTest {
371371
// GOOD
372372
"X(\\u0061|b)+Y",
373373

374+
// NOT GOOD
375+
"X(\\x61|a)*Y", // $ hasExpRedos
376+
377+
// GOOD
378+
"X(\\x61|b)+Y",
379+
380+
// NOT GOOD
381+
"X(\\x{061}|a)*Y", // $ hasExpRedos
382+
383+
// GOOD
384+
"X(\\x{061}|b)+Y",
385+
386+
// NOT GOOD
387+
"X(\\p{Digit}|7)*Y", // $ hasExpRedos
388+
389+
// GOOD
390+
"X(\\p{Digit}|b)+Y",
391+
392+
// NOT GOOD
393+
"X(\\P{Digit}|b)*Y", // $ hasExpRedos
394+
395+
// GOOD
396+
"X(\\P{Digit}|7)+Y",
397+
398+
// NOT GOOD
399+
"X(\\p{IsDigit}|7)*Y", // $ hasExpRedos
400+
401+
// GOOD
402+
"X(\\p{IsDigit}|b)+Y",
403+
404+
// NOT GOOD - but not detected
405+
"X(\\p{Alpha}|a)*Y", // $ MISSING: hasExpRedos
406+
407+
// GOOD
408+
"X(\\p{Alpha}|7)+Y",
409+
374410
// GOOD
375411
"(\"[^\"]*?\"|[^\"\\s]+)+(?=\\s*|\\s*$)",
376412

0 commit comments

Comments
 (0)