@@ -382,7 +382,7 @@ abstract class RegexString extends Expr {
382382 not c = "[" and
383383 not c = ")" and
384384 not c = "|" and
385- not this .qualifier ( start , _, _)
385+ not this .qualifier ( start , _, _, _ )
386386 )
387387 }
388388
@@ -688,41 +688,51 @@ abstract class RegexString extends Expr {
688688 this .backreference ( start , end )
689689 }
690690
691- private predicate qualifier ( int start , int end , boolean maybe_empty ) {
692- this .short_qualifier ( start , end , maybe_empty ) and not this .getChar ( end ) = "?"
691+ private predicate qualifier ( int start , int end , boolean maybe_empty , boolean may_repeat_forever ) {
692+ this .short_qualifier ( start , end , maybe_empty , may_repeat_forever ) and
693+ not this .getChar ( end ) = "?"
693694 or
694- exists ( int short_end | this .short_qualifier ( start , short_end , maybe_empty ) |
695+ exists ( int short_end | this .short_qualifier ( start , short_end , maybe_empty , may_repeat_forever ) |
695696 if this .getChar ( short_end ) = "?" then end = short_end + 1 else end = short_end
696697 )
697698 }
698699
699- private predicate short_qualifier ( int start , int end , boolean maybe_empty ) {
700+ private predicate short_qualifier (
701+ int start , int end , boolean maybe_empty , boolean may_repeat_forever
702+ ) {
700703 (
701- this .getChar ( start ) = "+" and maybe_empty = false
704+ this .getChar ( start ) = "+" and maybe_empty = false and may_repeat_forever = true
702705 or
703- this .getChar ( start ) = "*" and maybe_empty = true
706+ this .getChar ( start ) = "*" and maybe_empty = true and may_repeat_forever = true
704707 or
705- this .getChar ( start ) = "?" and maybe_empty = true
708+ this .getChar ( start ) = "?" and maybe_empty = true and may_repeat_forever = false
706709 ) and
707710 end = start + 1
708711 or
709- exists ( int endin | end = endin + 1 |
710- this .getChar ( start ) = "{" and
711- this .getChar ( endin ) = "}" and
712- end > start and
713- exists ( string multiples | multiples = this .getText ( ) .substring ( start + 1 , endin ) |
714- multiples .regexpMatch ( "0+" ) and maybe_empty = true
715- or
716- multiples .regexpMatch ( "0*,[0-9]*" ) and maybe_empty = true
717- or
718- multiples .regexpMatch ( "0*[1-9][0-9]*" ) and maybe_empty = false
719- or
720- multiples .regexpMatch ( "0*[1-9][0-9]*,[0-9]*" ) and maybe_empty = false
721- ) and
722- not exists ( int mid |
723- this .getChar ( mid ) = "}" and
724- mid > start and
725- mid < endin
712+ exists ( string lower , string upper |
713+ this .multiples ( start , end , lower , upper ) and
714+ ( if lower = "" or lower .toInt ( ) = 0 then maybe_empty = true else maybe_empty = false ) and
715+ if upper = "" then may_repeat_forever = true else may_repeat_forever = false
716+ )
717+ }
718+
719+ /**
720+ * Holds if a repetition quantifier is found between `start` and `end`,
721+ * with the given lower and upper bounds. If a bound is omitted, the corresponding
722+ * string is empty.
723+ */
724+ predicate multiples ( int start , int end , string lower , string upper ) {
725+ this .getChar ( start ) = "{" and
726+ this .getChar ( end - 1 ) = "}" and
727+ exists ( string inner | inner = this .getText ( ) .substring ( start + 1 , end - 1 ) |
728+ inner .regexpMatch ( "[0-9]+" ) and
729+ lower = inner and
730+ upper = lower
731+ or
732+ inner .regexpMatch ( "[0-9]*,[0-9]*" ) and
733+ exists ( int commaIndex | commaIndex = inner .indexOf ( "," ) |
734+ lower = inner .prefix ( commaIndex ) and
735+ upper = inner .suffix ( commaIndex + 1 )
726736 )
727737 )
728738 }
@@ -731,19 +741,29 @@ abstract class RegexString extends Expr {
731741 * Whether the text in the range start,end is a qualified item, where item is a character,
732742 * a character set or a group.
733743 */
734- predicate qualifiedItem ( int start , int end , boolean maybe_empty ) {
735- this .qualifiedPart ( start , _, end , maybe_empty )
744+ predicate qualifiedItem ( int start , int end , boolean maybe_empty , boolean may_repeat_forever ) {
745+ this .qualifiedPart ( start , _, end , maybe_empty , may_repeat_forever )
736746 }
737747
738- private predicate qualifiedPart ( int start , int part_end , int end , boolean maybe_empty ) {
748+ /**
749+ * Holds if a qualified part is found between `start` and `part_end` and the qualifier is
750+ * found between `part_end` and `end`.
751+ *
752+ * `maybe_empty` is true if the part is optional.
753+ * `may_repeat_forever` is true if the part may be repeated unboundedly.
754+ */
755+ predicate qualifiedPart (
756+ int start , int part_end , int end , boolean maybe_empty , boolean may_repeat_forever
757+ ) {
739758 this .baseItem ( start , part_end ) and
740- this .qualifier ( part_end , end , maybe_empty )
759+ this .qualifier ( part_end , end , maybe_empty , may_repeat_forever )
741760 }
742761
743- private predicate item ( int start , int end ) {
744- this .qualifiedItem ( start , end , _)
762+ /** Holds if the range `start`, `end` contains a character, a quantifier, a character set or a group. */
763+ predicate item ( int start , int end ) {
764+ this .qualifiedItem ( start , end , _, _)
745765 or
746- this .baseItem ( start , end ) and not this .qualifier ( end , _, _)
766+ this .baseItem ( start , end ) and not this .qualifier ( end , _, _, _ )
747767 }
748768
749769 private predicate subsequence ( int start , int end ) {
@@ -766,7 +786,7 @@ abstract class RegexString extends Expr {
766786 */
767787 predicate sequence ( int start , int end ) {
768788 this .sequenceOrQualified ( start , end ) and
769- not this .qualifiedItem ( start , end , _)
789+ not this .qualifiedItem ( start , end , _, _ )
770790 }
771791
772792 private predicate sequenceOrQualified ( int start , int end ) {
@@ -777,7 +797,8 @@ abstract class RegexString extends Expr {
777797 private predicate item_start ( int start ) {
778798 this .character ( start , _) or
779799 this .isGroupStart ( start ) or
780- this .charSet ( start , _)
800+ this .charSet ( start , _) or
801+ this .backreference ( start , _)
781802 }
782803
783804 private predicate item_end ( int end ) {
@@ -787,7 +808,7 @@ abstract class RegexString extends Expr {
787808 or
788809 this .charSet ( _, end )
789810 or
790- this .qualifier ( _, end , _)
811+ this .qualifier ( _, end , _, _ )
791812 }
792813
793814 private predicate top_level ( int start , int end ) {
@@ -839,14 +860,14 @@ abstract class RegexString extends Expr {
839860 or
840861 exists ( int x | this .firstPart ( x , end ) |
841862 this .emptyMatchAtStartGroup ( x , start ) or
842- this .qualifiedItem ( x , start , true ) or
863+ this .qualifiedItem ( x , start , true , _ ) or
843864 this .specialCharacter ( x , start , "^" )
844865 )
845866 or
846867 exists ( int y | this .firstPart ( start , y ) |
847868 this .item ( start , end )
848869 or
849- this .qualifiedPart ( start , end , y , _)
870+ this .qualifiedPart ( start , end , y , _, _ )
850871 )
851872 or
852873 exists ( int x , int y | this .firstPart ( x , y ) |
@@ -863,7 +884,7 @@ abstract class RegexString extends Expr {
863884 exists ( int y | this .lastPart ( start , y ) |
864885 this .emptyMatchAtEndGroup ( end , y )
865886 or
866- this .qualifiedItem ( end , y , true )
887+ this .qualifiedItem ( end , y , true , _ )
867888 or
868889 this .specialCharacter ( end , y , "$" )
869890 or
@@ -875,7 +896,7 @@ abstract class RegexString extends Expr {
875896 this .item ( start , end )
876897 )
877898 or
878- exists ( int y | this .lastPart ( start , y ) | this .qualifiedPart ( start , end , y , _) )
899+ exists ( int y | this .lastPart ( start , y ) | this .qualifiedPart ( start , end , y , _, _ ) )
879900 or
880901 exists ( int x , int y | this .lastPart ( x , y ) |
881902 this .groupContents ( x , y , start , end )
@@ -892,7 +913,7 @@ abstract class RegexString extends Expr {
892913 (
893914 this .character ( start , end )
894915 or
895- this .qualifiedItem ( start , end , _)
916+ this .qualifiedItem ( start , end , _, _ )
896917 or
897918 this .charSet ( start , end )
898919 ) and
@@ -907,7 +928,7 @@ abstract class RegexString extends Expr {
907928 (
908929 this .character ( start , end )
909930 or
910- this .qualifiedItem ( start , end , _)
931+ this .qualifiedItem ( start , end , _, _ )
911932 or
912933 this .charSet ( start , end )
913934 ) and
0 commit comments