Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d04c99b

Browse files
Support quote sequences
1 parent 59945cd commit d04c99b

2 files changed

Lines changed: 94 additions & 6 deletions

File tree

java/ql/lib/semmle/code/java/regex/RegexTreeView.qll

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ newtype TRegExpParent =
4040
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
4141
/** A normal character */
4242
TRegExpNormalChar(Regex re, int start, int end) { re.normalCharacter(start, end) } or
43+
/** A quoted sequence */
44+
TRegExpQuote(Regex re, int start, int end) { re.quote(start, end) } or
4345
/** A back reference */
4446
TRegExpBackRef(Regex re, int start, int end) { re.backreference(start, end) }
4547

@@ -107,6 +109,8 @@ class RegExpTerm extends RegExpParent {
107109
or
108110
this = TRegExpNormalChar(re, start, end)
109111
or
112+
this = TRegExpQuote(re, start, end)
113+
or
110114
this = TRegExpGroup(re, start, end)
111115
or
112116
this = TRegExpQuantifier(re, start, end)
@@ -675,9 +679,34 @@ class RegExpNormalChar extends RegExpTerm, TRegExpNormalChar {
675679
override string getPrimaryQLClass() { result = "RegExpNormalChar" }
676680
}
677681

682+
/**
683+
* A quoted sequence.
684+
*
685+
* Example:
686+
* ```
687+
* \Qabc\E
688+
* ```
689+
*/
690+
class RegExpQuote extends RegExpTerm, TRegExpQuote {
691+
string value;
692+
693+
RegExpQuote() {
694+
exists(int inner_start, int inner_end |
695+
this = TRegExpQuote(re, start, end) and
696+
re.quote(start, end, inner_start, inner_end) and
697+
value = re.getText().substring(inner_start, inner_end)
698+
)
699+
}
700+
701+
/** Gets the string matched by this quote term. */
702+
string getValue() { result = value }
703+
704+
override string getPrimaryQLClass() { result = "RegExpQuote" }
705+
}
706+
678707
/**
679708
* A constant regular expression term, that is, a regular expression
680-
* term matching a single string. Currently, this will always be a single character.
709+
* term matching a single string. This can be a single character or a quoted sequence.
681710
*
682711
* Example:
683712
*
@@ -689,14 +718,14 @@ class RegExpConstant extends RegExpTerm {
689718
string value;
690719

691720
RegExpConstant() {
692-
this = TRegExpNormalChar(re, start, end) and
721+
(this = TRegExpNormalChar(re, start, end) or this = TRegExpQuote(re, start, end)) and
693722
not this instanceof RegExpCharacterClassEscape and
694723
// exclude chars in qualifiers
695724
// TODO: push this into regex library
696725
not exists(int qstart, int qend | re.qualifiedPart(_, qstart, qend, _, _) |
697726
qstart <= start and end <= qend
698727
) and
699-
value = this.(RegExpNormalChar).getValue()
728+
(value = this.(RegExpNormalChar).getValue() or value = this.(RegExpQuote).getValue())
700729
}
701730

702731
/**

java/ql/lib/semmle/code/java/regex/regex.qll

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -189,13 +189,17 @@ abstract class RegexString extends Expr {
189189
}
190190

191191
/** Holds if the character at `pos` is a "\" that is actually escaping what comes after. */
192-
predicate escapingChar(int pos) { this.escaping(pos) = true }
192+
predicate escapingChar(int pos) {
193+
this.escaping(pos) = true and
194+
not exists(int x, int y | this.quote(x, y) and pos in [x .. y - 1])
195+
}
193196

194197
/**
195198
* Helper predicate for `escapingChar`.
196199
* In order to avoid negative recusrion, we return a boolean.
197200
* This way, we can refer to `escaping(pos - 1).booleanNot()`
198201
* rather than to a negated version of `escaping(pos)`.
202+
* Does not take into account escape characters inside quote sequences.
199203
*/
200204
private boolean escaping(int pos) {
201205
pos = -1 and result = false
@@ -205,14 +209,62 @@ abstract class RegexString extends Expr {
205209
this.getChar(pos) != "\\" and result = false
206210
}
207211

212+
/**
213+
* Helper predicate for `quoteSequence`.
214+
* Holds if the char at `pos` could be the beginning of a quote delimiter, i.e. `\Q` (non-escaped) or `\E` (escaping not checked, as quote sequences turn off escapes).
215+
* Result is `true` for `\Q` and `false` for `\E`.
216+
*/
217+
private boolean quote_delimiter(int pos) {
218+
result = true and
219+
this.escaping(pos) = true and
220+
this.getChar(pos + 1) = "Q"
221+
or
222+
result = false and
223+
this.getChar(pos) = "\\" and
224+
this.getChar(pos + 1) = "E"
225+
}
226+
227+
/**
228+
* Helper predicate for `quoteSequence`.
229+
* Holds if the char at `pos` is the one-based `index`th occourence of a quote delimiter (`\Q` or `\E`)
230+
* Result is `true` for `\Q` and `false` for `\E`.
231+
*/
232+
private boolean quote_delimiter(int index, int pos) {
233+
result = this.quote_delimiter(pos) and
234+
pos = rank[index](int p | this.quote_delimiter(p) = [true, false])
235+
}
236+
237+
/** Holds if a quoted sequence is found between `start` and `end` */
238+
predicate quote(int start, int end) { this.quote(start, end, _, _) }
239+
240+
/** Holds if a quoted sequence is fund between `start` and `end`, with ontent found between `inner_start` and `inner_end`. */
241+
predicate quote(int start, int end, int inner_start, int inner_end) {
242+
exists(int index |
243+
this.quote_delimiter(index, start) = true and
244+
(
245+
index = 1
246+
or
247+
this.quote_delimiter(index - 1, _) = false
248+
) and
249+
inner_start = start + 2 and
250+
inner_end = end - 2 and
251+
inner_end > inner_start and
252+
this.quote_delimiter(inner_end) = false and
253+
not exists(int mid |
254+
this.quote_delimiter(mid) = false and mid in [inner_start .. inner_end - 1]
255+
)
256+
)
257+
}
258+
208259
/** Gets the text of this regex */
209260
string getText() { result = this.(StringLiteral).getValue() }
210261

211262
string getChar(int i) { result = this.getText().charAt(i) }
212263

213264
string nonEscapedCharAt(int i) {
214265
result = this.getText().charAt(i) and
215-
not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1])
266+
not exists(int x, int y | this.escapedCharacter(x, y) and i in [x .. y - 1]) and
267+
not exists(int x, int y | this.quote(x, y) and i in [x .. y - 1])
216268
}
217269

218270
private predicate isOptionDivider(int i) { this.nonEscapedCharAt(i) = "|" }
@@ -728,7 +780,8 @@ abstract class RegexString extends Expr {
728780
this.character(start, _) or
729781
this.isGroupStart(start) or
730782
this.charSet(start, _) or
731-
this.backreference(start, _)
783+
this.backreference(start, _) or
784+
this.quote(start, _)
732785
}
733786

734787
private predicate item_end(int end) {
@@ -739,6 +792,8 @@ abstract class RegexString extends Expr {
739792
this.charSet(_, end)
740793
or
741794
this.qualifier(_, end, _, _)
795+
or
796+
this.quote(_, end)
742797
}
743798

744799
private predicate top_level(int start, int end) {
@@ -846,6 +901,8 @@ abstract class RegexString extends Expr {
846901
this.qualifiedItem(start, end, _, _)
847902
or
848903
this.charSet(start, end)
904+
or
905+
this.quote(start, end)
849906
) and
850907
this.firstPart(start, end)
851908
}
@@ -861,6 +918,8 @@ abstract class RegexString extends Expr {
861918
this.qualifiedItem(start, end, _, _)
862919
or
863920
this.charSet(start, end)
921+
or
922+
this.quote(start, end)
864923
) and
865924
this.lastPart(start, end)
866925
}

0 commit comments

Comments
 (0)