@@ -5,18 +5,18 @@ private import semmle.code.java.regex.regex
55
66/**
77 * An element containing a regular expression term, that is, either
8- * a string literal (parsed as a regular expression)
9- * or another regular expression term.
8+ * a string literal (parsed as a regular expression; the root of the parse tree )
9+ * or another regular expression term (a decendent of the root) .
1010 *
11- * For sequences and alternations, we require at least one child .
11+ * For sequences and alternations, we require at least two children .
1212 * Otherwise, we wish to represent the term differently.
1313 * This avoids multiple representations of the same term.
1414 */
15- newtype TRegExpParent =
15+ private newtype TRegExpParent =
1616 /** A string literal used as a regular expression */
1717 TRegExpLiteral ( Regex re ) or
1818 /** A quantified term */
19- TRegExpQuantifier ( Regex re , int start , int end ) { re .qualifiedItem ( start , end , _, _) } or
19+ TRegExpQuantifier ( Regex re , int start , int end ) { re .quantifiedItem ( start , end , _, _) } or
2020 /** A sequence term */
2121 TRegExpSequence ( Regex re , int start , int end ) {
2222 re .sequence ( start , end ) and
@@ -47,8 +47,8 @@ newtype TRegExpParent =
4747
4848/**
4949 * An element containing a regular expression term, that is, either
50- * a string literal (parsed as a regular expression)
51- * or another regular expression term.
50+ * a string literal (parsed as a regular expression; the root of the parse tree )
51+ * or another regular expression term (a decendent of the root) .
5252 */
5353class RegExpParent extends TRegExpParent {
5454 /** Gets a textual representation of this element. */
@@ -92,6 +92,7 @@ class RegExpLiteral extends TRegExpLiteral, RegExpParent {
9292
9393/**
9494 * A regular expression term, that is, a syntactic part of a regular expression.
95+ * These are the tree nodes that form the parse tree of a regular expression literal.
9596 */
9697class RegExpTerm extends RegExpParent {
9798 Regex re ;
@@ -187,6 +188,8 @@ class RegExpTerm extends RegExpParent {
187188 predicate hasLocationInfo (
188189 string filepath , int startline , int startcolumn , int endline , int endcolumn
189190 ) {
191+ // This currently gives incorrect results for string literals including backslashes. TODO: fix that.
192+ // There are also more complex cases where it fails. Handling all of them would be difficult for not much gain.
190193 exists ( int re_start , int re_end |
191194 re .getLocation ( ) .hasLocationInfo ( filepath , startline , re_start , endline , re_end ) and
192195 startcolumn = re_start + start + 1 and
@@ -245,7 +248,7 @@ class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
245248
246249 RegExpQuantifier ( ) {
247250 this = TRegExpQuantifier ( re , start , end ) and
248- re .qualifiedPart ( start , part_end , end , maybe_empty , may_repeat_forever )
251+ re .quantifiedPart ( start , part_end , end , maybe_empty , may_repeat_forever )
249252 }
250253
251254 override RegExpTerm getChild ( int i ) {
@@ -255,11 +258,11 @@ class RegExpQuantifier extends RegExpTerm, TRegExpQuantifier {
255258 result .getEnd ( ) = part_end
256259 }
257260
258- /** Hols if this term may match an unlimited number of times. */
261+ /** Holds if this term may match an unlimited number of times. */
259262 predicate mayRepeatForever ( ) { may_repeat_forever = true }
260263
261- /** Gets the qualifier for this term. That is e.g "?" for "a?". */
262- string getQualifier ( ) { result = re .getText ( ) .substring ( part_end , end ) }
264+ /** Gets the quantifier for this term. That is e.g "?" for "a?". */
265+ string getquantifier ( ) { result = re .getText ( ) .substring ( part_end , end ) }
263266
264267 override string getPrimaryQLClass ( ) { result = "RegExpQuantifier" }
265268}
@@ -281,7 +284,7 @@ class InfiniteRepetitionQuantifier extends RegExpQuantifier {
281284 * ```
282285 */
283286class RegExpStar extends InfiniteRepetitionQuantifier {
284- RegExpStar ( ) { this .getQualifier ( ) .charAt ( 0 ) = "*" }
287+ RegExpStar ( ) { this .getquantifier ( ) .charAt ( 0 ) = "*" }
285288
286289 override string getPrimaryQLClass ( ) { result = "RegExpStar" }
287290}
@@ -296,7 +299,7 @@ class RegExpStar extends InfiniteRepetitionQuantifier {
296299 * ```
297300 */
298301class RegExpPlus extends InfiniteRepetitionQuantifier {
299- RegExpPlus ( ) { this .getQualifier ( ) .charAt ( 0 ) = "+" }
302+ RegExpPlus ( ) { this .getquantifier ( ) .charAt ( 0 ) = "+" }
300303
301304 override string getPrimaryQLClass ( ) { result = "RegExpPlus" }
302305}
@@ -311,7 +314,7 @@ class RegExpPlus extends InfiniteRepetitionQuantifier {
311314 * ```
312315 */
313316class RegExpOpt extends RegExpQuantifier {
314- RegExpOpt ( ) { this .getQualifier ( ) .charAt ( 0 ) = "?" }
317+ RegExpOpt ( ) { this .getquantifier ( ) .charAt ( 0 ) = "?" }
315318
316319 override string getPrimaryQLClass ( ) { result = "RegExpOpt" }
317320}
@@ -333,10 +336,10 @@ class RegExpRange extends RegExpQuantifier {
333336
334337 RegExpRange ( ) { re .multiples ( part_end , end , lower , upper ) }
335338
336- /** Gets the string defining the upper bound of this range, if any . */
339+ /** Gets the string defining the upper bound of this range, which is empty when no such bound exists . */
337340 string getUpper ( ) { result = upper }
338341
339- /** Gets the string defining the lower bound of this range, if any . */
342+ /** Gets the string defining the lower bound of this range, which is empty when no such bound exists . */
340343 string getLower ( ) { result = lower }
341344
342345 /**
@@ -578,9 +581,6 @@ class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
578581 /** Holds if this character class is inverted, matching the opposite of its content. */
579582 predicate isInverted ( ) { re .getChar ( start + 1 ) = "^" }
580583
581- /** Gets the `i`th char inside this charater class. */
582- string getCharThing ( int i ) { result = re .getChar ( i + start ) }
583-
584584 /** Holds if this character class can match anything. */
585585 predicate isUniversalClass ( ) {
586586 // [^]
@@ -724,9 +724,9 @@ class RegExpConstant extends RegExpTerm {
724724 RegExpConstant ( ) {
725725 ( this = TRegExpNormalChar ( re , start , end ) or this = TRegExpQuote ( re , start , end ) ) and
726726 not this instanceof RegExpCharacterClassEscape and
727- // exclude chars in qualifiers
727+ // exclude chars in quantifiers
728728 // TODO: push this into regex library
729- not exists ( int qstart , int qend | re .qualifiedPart ( _, qstart , qend , _, _) |
729+ not exists ( int qstart , int qend | re .quantifiedPart ( _, qstart , qend , _, _) |
730730 qstart <= start and end <= qend
731731 ) and
732732 ( value = this .( RegExpNormalChar ) .getValue ( ) or value = this .( RegExpQuote ) .getValue ( ) )
0 commit comments