@@ -3,6 +3,7 @@ private import AST
33private import Constant
44private import TreeSitter
55private import codeql.ruby.controlflow.CfgNodes
6+ private import codeql.NumberUtils
67
78int parseInteger ( Ruby:: Integer i ) {
89 exists ( string s | s = i .getValue ( ) .toLowerCase ( ) .replaceAll ( "_" , "" ) |
@@ -148,16 +149,85 @@ private class RequiredFileLiteralConstantValue extends RequiredConstantValue {
148149
149150private class RequiredStringTextComponentConstantValue extends RequiredConstantValue {
150151 override predicate requiredString ( string s ) {
151- s = any ( Ruby:: Token t | exists ( TStringTextComponentNonRegexp ( t ) ) ) .getValue ( )
152+ s =
153+ unescapeTextComponent ( any ( Ruby:: Token t | exists ( TStringTextComponentNonRegexp ( t ) ) ) .getValue ( ) )
152154 }
153155}
154156
155157private class RequiredStringEscapeSequenceComponentConstantValue extends RequiredConstantValue {
156158 override predicate requiredString ( string s ) {
157- s = any ( Ruby:: Token t | exists ( TStringEscapeSequenceComponentNonRegexp ( t ) ) ) .getValue ( )
159+ s =
160+ unescapeEscapeSequence ( any ( Ruby:: Token t | exists ( TStringEscapeSequenceComponentNonRegexp ( t ) ) )
161+ .getValue ( ) )
158162 }
159163}
160164
165+ /**
166+ * Gets the string represented by the escape sequence in `escaped`. For example:
167+ *
168+ * ```
169+ * \\ => \
170+ * \141 => a
171+ * \u0078 => x
172+ * ```
173+ */
174+ bindingset [ escaped]
175+ string unescapeEscapeSequence ( string escaped ) {
176+ result = unescapeKnownEscapeSequence ( escaped )
177+ or
178+ // Any other character following a backslash is just that character.
179+ not exists ( unescapeKnownEscapeSequence ( escaped ) ) and
180+ result = escaped .suffix ( 1 )
181+ }
182+
183+ bindingset [ escaped]
184+ private string unescapeKnownEscapeSequence ( string escaped ) {
185+ escaped = "\\\\" and result = "\\"
186+ or
187+ escaped = "\\'" and result = "'"
188+ or
189+ escaped = "\\\"" and result = "\""
190+ or
191+ escaped = "\\a" and result = 7 .toUnicode ( )
192+ or
193+ escaped = "\\b" and result = 8 .toUnicode ( )
194+ or
195+ escaped = "\\t" and result = "\t"
196+ or
197+ escaped = "\\n" and result = "\n"
198+ or
199+ escaped = "\\v" and result = 11 .toUnicode ( )
200+ or
201+ escaped = "\\f" and result = 12 .toUnicode ( )
202+ or
203+ escaped = "\\r" and result = "\r"
204+ or
205+ escaped = "\\e" and result = 27 .toUnicode ( )
206+ or
207+ escaped = "\\s" and result = " "
208+ or
209+ escaped = [ "\\c?" , "\\C-?" ] and result = 127 .toUnicode ( )
210+ or
211+ result = parseOctalInt ( escaped .regexpCapture ( "\\\\([0-7]{1,3})" , 1 ) ) .toUnicode ( )
212+ or
213+ result = parseHexInt ( escaped .regexpCapture ( "\\\\x([0-9a-fA-F]{1,2})" , 1 ) ) .toUnicode ( )
214+ or
215+ result = parseHexInt ( escaped .regexpCapture ( "\\\\u([0-9a-fA-F]{4})" , 1 ) ) .toUnicode ( )
216+ or
217+ result = parseHexInt ( escaped .regexpCapture ( "\\\\u\\{([0-9a-fA-F]{1,6})\\}" , 1 ) ) .toUnicode ( )
218+ }
219+
220+ /**
221+ * Gets the result of unescaping a string text component by replacing `\\` and
222+ * `\'` with `\` and `'`, respectively.
223+ *
224+ * ```rb
225+ * 'foo\\bar \'baz\'' # foo\bar 'baz'
226+ * ```
227+ */
228+ bindingset [ text]
229+ string unescapeTextComponent ( string text ) { result = text .regexpReplaceAll ( "\\\\(['\\\\])" , "$1" ) }
230+
161231class TRegExpComponent =
162232 TStringTextComponentRegexp or TStringEscapeSequenceComponentRegexp or
163233 TStringInterpolationComponentRegexp ;
0 commit comments