@@ -55,7 +55,7 @@ class RegExpParent extends TRegExpParent {
5555 string toString ( ) { result = "RegExpParent" }
5656
5757 /** Gets the `i`th child term. */
58- abstract RegExpTerm getChild ( int i ) ;
58+ RegExpTerm getChild ( int i ) { none ( ) }
5959
6060 /** Gets a child term . */
6161 RegExpTerm getAChild ( ) { result = this .getChild ( _) }
@@ -143,26 +143,6 @@ class RegExpTerm extends RegExpParent {
143143 */
144144 predicate isRootTerm ( ) { start = 0 and end = re .getText ( ) .length ( ) }
145145
146- override RegExpTerm getChild ( int i ) {
147- result = this .( RegExpAlt ) .getChild ( i )
148- or
149- result = this .( RegExpBackRef ) .getChild ( i )
150- or
151- result = this .( RegExpCharacterClass ) .getChild ( i )
152- or
153- result = this .( RegExpCharacterRange ) .getChild ( i )
154- or
155- result = this .( RegExpNormalChar ) .getChild ( i )
156- or
157- result = this .( RegExpGroup ) .getChild ( i )
158- or
159- result = this .( RegExpQuantifier ) .getChild ( i )
160- or
161- result = this .( RegExpSequence ) .getChild ( i )
162- or
163- result = this .( RegExpSpecialChar ) .getChild ( i )
164- }
165-
166146 /**
167147 * Gets the parent term of this regular expression term, or the
168148 * regular expression literal if this is the root term.
@@ -508,7 +488,7 @@ class RegExpEscape extends RegExpNormalChar {
508488 /**
509489 * Holds if this is a unicode escape.
510490 */
511- private predicate isUnicode ( ) { this .getText ( ) .matches ( "\\u%" ) }
491+ private predicate isUnicode ( ) { this .getText ( ) .matches ( [ "\\u%" , "\\x%" ] ) }
512492
513493 /**
514494 * Gets the unicode char for this escape.
@@ -520,13 +500,24 @@ class RegExpEscape extends RegExpNormalChar {
520500 )
521501 }
522502
503+ /** Gets the part of this escape that is a hexidecimal string */
504+ private string getHexString ( ) {
505+ this .isUnicode ( ) and
506+ if this .getText ( ) .matches ( "\\u%" ) // \uhhhh
507+ then result = this .getText ( ) .suffix ( 2 )
508+ else
509+ if this .getText ( ) .matches ( "\\x{%" ) // \x{h..h}
510+ then result = this .getText ( ) .substring ( 3 , this .getText ( ) .length ( ) - 1 )
511+ else result = this .getText ( ) .suffix ( 2 ) // \xhh
512+ }
513+
523514 /**
524515 * Gets int value for the `index`th char in the hex number of the unicode escape.
525516 * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
526517 */
527518 private int getHexValueFromUnicode ( int index ) {
528519 this .isUnicode ( ) and
529- exists ( string hex , string char | hex = this .getText ( ) . suffix ( 2 ) |
520+ exists ( string hex , string char | hex = this .getHexString ( ) |
530521 char = hex .charAt ( index ) and
531522 result = 16 .pow ( hex .length ( ) - index - 1 ) * toHex ( char )
532523 )
@@ -574,6 +565,50 @@ class RegExpCharacterClassEscape extends RegExpEscape {
574565 override string getPrimaryQLClass ( ) { result = "RegExpCharacterClassEscape" }
575566}
576567
568+ /**
569+ * A named character class in a regular expression.
570+ *
571+ * Examples:
572+ *
573+ * ```
574+ * \p{Digit}
575+ * \p{IsLowerCase}
576+ */
577+ class RegExpNamedProperty extends RegExpCharacterClassEscape {
578+ boolean inverted ;
579+ string name ;
580+
581+ RegExpNamedProperty ( ) {
582+ name = this .getValue ( ) .substring ( 2 , this .getValue ( ) .length ( ) - 1 ) and
583+ (
584+ inverted = false and
585+ this .getValue ( ) .charAt ( 0 ) = "p"
586+ or
587+ inverted = true and
588+ this .getValue ( ) .charAt ( 0 ) = "P"
589+ )
590+ }
591+
592+ /** Holds if this class is inverted. */
593+ predicate isInverted ( ) { inverted = true }
594+
595+ /** Gets the name of this class. */
596+ string getClassName ( ) { result = name }
597+
598+ /**
599+ * Gets an equivalent single-chcracter escape sequence for this class (e.g. \d) if possible, excluding the escape character.
600+ */
601+ string getBackslashEquivalent ( ) {
602+ exists ( string eq | if inverted = true then result = eq .toUpperCase ( ) else result = eq |
603+ name = [ "Digit" , "IsDigit" ] and
604+ eq = "d"
605+ or
606+ name = [ "Space" , "IsWhite_Space" ] and
607+ eq = "s"
608+ )
609+ }
610+ }
611+
577612/**
578613 * A character class in a regular expression.
579614 *
0 commit comments