github
diff --git a/‎ruby/ql/lib/codeql/NumberUtils.qll‎
Lines changed: 97 additions & 0 deletions b/‎ruby/ql/lib/codeql/NumberUtils.qll‎
Lines changed: 97 additions & 0 deletions
diff --git a/‎ruby/ql/lib/codeql/ruby/ast/Literal.qll‎
Lines changed: 14 additions & 4 deletions b/‎ruby/ql/lib/codeql/ruby/ast/Literal.qll‎
Lines changed: 14 additions & 4 deletions
diff --git a/‎ruby/ql/lib/codeql/ruby/ast/internal/Literal.qll‎
Lines changed: 72 additions & 2 deletions b/‎ruby/ql/lib/codeql/ruby/ast/internal/Literal.qll‎
Lines changed: 72 additions & 2 deletions
diff --git a/‎ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll‎
Lines changed: 2 additions & 34 deletions b/‎ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll‎
Lines changed: 2 additions & 34 deletions
@@ -0,0 +1,97 @@
+/**
+ * Provides predicates for working with numeric values and their string
+ * representations.
+ */
+
+/**
+ * Gets the integer value of `hex` when interpreted as hex. `hex` must be a
+ * valid hexadecimal string and, for integer-wrapping reasons, no longer than 6
+ * digits.
+ *
+ * ```
+ * "0"    => 0
+ * "FF"   => 255
+ * "f00d" => 61453
+ * ```
+ */
+bindingset[hex]
+int parseHexInt(string hex) {
+  hex.length() <= 6 and
+  result =
+    sum(int index, string c |
+      c = hex.charAt(index)
+    |
+      sixteenToThe(hex.length() - 1 - index) * toHex(c)
+    )
+}
+
+/**
+ * Gets the integer value of `octal` when interpreted as octal. `octal` must be
+ * a valid octal string and, for integer-wrapping reasons, no longer than 10
+ * digits.
+ *
+ * ```
+ * "0"        => 0
+ * "77"       => 63
+ * "76543210" => 16434824
+ * ```
+ */
+bindingset[octal]
+int parseOctalInt(string octal) {
+  octal.length() <= 10 and
+  result =
+    sum(int index, string c |
+      c = octal.charAt(index)
+    |
+      eightToThe(octal.length() - 1 - index) * toOctal(c)
+    )
+}
+
+/** Gets the integer value of the `hex` char. */
+private int toHex(string hex) {
+  hex = [0 .. 9].toString() and
+  result = hex.toInt()
+  or
+  result = 10 and hex = ["a", "A"]
+  or
+  result = 11 and hex = ["b", "B"]
+  or
+  result = 12 and hex = ["c", "C"]
+  or
+  result = 13 and hex = ["d", "D"]
+  or
+  result = 14 and hex = ["e", "E"]
+  or
+  result = 15 and hex = ["f", "F"]
+}
+
+/** Gets the integer value of the `octal` char. */
+private int toOctal(string octal) {
+  octal = "0" and result = 0
+  or
+  octal = "1" and result = 1
+  or
+  octal = "2" and result = 2
+  or
+  octal = "3" and result = 3
+  or
+  octal = "4" and result = 4
+  or
+  octal = "5" and result = 5
+  or
+  octal = "6" and result = 6
+  or
+  octal = "7" and result = 7
+}
+
+/** Gets the value of 16 to the power of `n`. */
+int sixteenToThe(int n) {
+  // 16**7 is the largest power of 16 that fits in an int.
+  n in [0 .. 7] and result = 1.bitShiftLeft(4 * n)
+}
+
+/** Gets the value of 8 to the power of `n`. */
+int eightToThe(int n) {
+  // 8**10 is the largest power of 8 that fits in an int.
+  n in [0 .. 10] and result = 1.bitShiftLeft(3 * n)
+}
@@ -230,13 +230,18 @@ class StringTextComponent extends StringComponent, TStringTextComponentNonRegexp
 
   StringTextComponent() { this = TStringTextComponentNonRegexp(g) }
 
-  final override string toString() { result = g.getValue() }
+  final override string toString() { result = this.getRawText() }
 
   final override ConstantValue::ConstantStringValue getConstantValue() {
-    result.isString(g.getValue())
+    result.isString(this.getUnescapedText())
   }
 
   final override string getAPrimaryQlClass() { result = "StringTextComponent" }
+
+  /** Gets the text of this component as it appears in the source code. */
+  final string getRawText() { result = g.getValue() }
+
+  final private string getUnescapedText() { result = unescapeTextComponent(this.getRawText()) }
 }
 
 /**
@@ -247,13 +252,18 @@ class StringEscapeSequenceComponent extends StringComponent, TStringEscapeSequen
 
   StringEscapeSequenceComponent() { this = TStringEscapeSequenceComponentNonRegexp(g) }
 
-  final override string toString() { result = g.getValue() }
+  final override string toString() { result = this.getRawText() }
 
   final override ConstantValue::ConstantStringValue getConstantValue() {
-    result.isString(g.getValue())
+    result.isString(this.getUnescapedText())
   }
 
   final override string getAPrimaryQlClass() { result = "StringEscapeSequenceComponent" }
+
+  /** Gets the text of this component as it appears in the source code. */
+  final string getRawText() { result = g.getValue() }
+
+  final private string getUnescapedText() { result = unescapeEscapeSequence(this.getRawText()) }
 }
 
 /**
 
@@ -3,6 +3,7 @@ private import AST
 private import Constant
 private import TreeSitter
 private import codeql.ruby.controlflow.CfgNodes
+private import codeql.NumberUtils
 
 int parseInteger(Ruby::Integer i) {
   exists(string s | s = i.getValue().toLowerCase().replaceAll("_", "") |
@@ -148,16 +149,85 @@ private class RequiredFileLiteralConstantValue extends RequiredConstantValue {
 
 private class RequiredStringTextComponentConstantValue extends RequiredConstantValue {
   override predicate requiredString(string s) {
-    s = any(Ruby::Token t | exists(TStringTextComponentNonRegexp(t))).getValue()
+    s =
+      unescapeTextComponent(any(Ruby::Token t | exists(TStringTextComponentNonRegexp(t))).getValue())
   }
 }
 
 private class RequiredStringEscapeSequenceComponentConstantValue extends RequiredConstantValue {
   override predicate requiredString(string s) {
-    s = any(Ruby::Token t | exists(TStringEscapeSequenceComponentNonRegexp(t))).getValue()
+    s =
+      unescapeEscapeSequence(any(Ruby::Token t | exists(TStringEscapeSequenceComponentNonRegexp(t)))
+            .getValue())
   }
 }
 
+/**
+ * Gets the string represented by the escape sequence in `escaped`. For example:
+ *
+ * ```
+ * \\     => \
+ * \141   => a
+ * \u0078 => x
+ * ```
+ */
+bindingset[escaped]
+string unescapeEscapeSequence(string escaped) {
+  result = unescapeKnownEscapeSequence(escaped)
+  or
+  // Any other character following a backslash is just that character.
+  not exists(unescapeKnownEscapeSequence(escaped)) and
+  result = escaped.suffix(1)
+}
+
+bindingset[escaped]
+private string unescapeKnownEscapeSequence(string escaped) {
+  escaped = "\\\\" and result = "\\"
+  or
+  escaped = "\\'" and result = "'"
+  or
+  escaped = "\\\"" and result = "\""
+  or
+  escaped = "\\a" and result = 7.toUnicode()
+  or
+  escaped = "\\b" and result = 8.toUnicode()
+  or
+  escaped = "\\t" and result = "\t"
+  or
+  escaped = "\\n" and result = "\n"
+  or
+  escaped = "\\v" and result = 11.toUnicode()
+  or
+  escaped = "\\f" and result = 12.toUnicode()
+  or
+  escaped = "\\r" and result = "\r"
+  or
+  escaped = "\\e" and result = 27.toUnicode()
+  or
+  escaped = "\\s" and result = " "
+  or
+  escaped = ["\\c?", "\\C-?"] and result = 127.toUnicode()
+  or
+  result = parseOctalInt(escaped.regexpCapture("\\\\([0-7]{1,3})", 1)).toUnicode()
+  or
+  result = parseHexInt(escaped.regexpCapture("\\\\x([0-9a-fA-F]{1,2})", 1)).toUnicode()
+  or
+  result = parseHexInt(escaped.regexpCapture("\\\\u([0-9a-fA-F]{4})", 1)).toUnicode()
+  or
+  result = parseHexInt(escaped.regexpCapture("\\\\u\\{([0-9a-fA-F]{1,6})\\}", 1)).toUnicode()
+}
+
+/**
+ * Gets the result of unescaping a string text component by replacing `\\` and
+ * `\'` with `\` and `'`, respectively.
+ *
+ * ```rb
+ * 'foo\\bar \'baz\'' # foo\bar 'baz'
+ * ```
+ */
+bindingset[text]
+string unescapeTextComponent(string text) { result = text.regexpReplaceAll("\\\\(['\\\\])", "$1") }
+
 class TRegExpComponent =
   TStringTextComponentRegexp or TStringEscapeSequenceComponentRegexp or
       TStringInterpolationComponentRegexp;
 
@@ -1,5 +1,6 @@
 private import codeql.ruby.ast.Literal as AST
 private import ParseRegExp
+private import codeql.NumberUtils
 import codeql.Locations
 private import codeql.ruby.DataFlow
 
@@ -423,48 +424,15 @@ class RegExpEscape extends RegExpNormalChar {
    * E.g. for `\u0061` this returns "a".
    */
   private string getUnicode() {
-    exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
-      result = codepoint.toUnicode()
-    )
-  }
-
-  /**
-   * Gets int value for the `index`th char in the hex number of the unicode escape.
-   * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
-   */
-  private int getHexValueFromUnicode(int index) {
     this.isUnicode() and
-    exists(string hex, string char | hex = this.getText().suffix(2) |
-      char = hex.charAt(index) and
-      result = 16.pow(hex.length() - index - 1) * toHex(char)
-    )
+    result = parseHexInt(this.getText().suffix(2)).toUnicode()
   }
 
   string getUnescaped() { result = this.getText().suffix(1) }
 
   override string getAPrimaryQlClass() { result = "RegExpEscape" }
 }
 
-/**
- * Gets the hex number for the `hex` char.
- */
-private int toHex(string hex) {
-  hex = [0 .. 9].toString() and
-  result = hex.toInt()
-  or
-  result = 10 and hex = ["a", "A"]
-  or
-  result = 11 and hex = ["b", "B"]
-  or
-  result = 12 and hex = ["c", "C"]
-  or
-  result = 13 and hex = ["d", "D"]
-  or
-  result = 14 and hex = ["e", "E"]
-  or
-  result = 15 and hex = ["f", "F"]
-}
-
 /**
  * A word boundary, that is, a regular expression term of the form `\b`.
  */