Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6dbeb81

Browse files
committed
Ruby: use AccessPathSyntax.qll to parse input/output summary specs
1 parent 0af9e8a commit 6dbeb81

5 files changed

Lines changed: 174 additions & 85 deletions

File tree

config/identical-files.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,7 @@
505505
"AccessPathSyntax": [
506506
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll",
507507
"java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll",
508-
"javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll"
508+
"javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll",
509+
"ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll"
509510
]
510511
}
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/**
2+
* Module for parsing access paths from CSV models, both the identifying access path used
3+
* by dynamic languages, and the input/output specifications for summary steps.
4+
*
5+
* This file is used by shared data flow library and by the JavaScript libraries
6+
* (which does not use the shared data flow libraries).
7+
*/
8+
9+
/** Companion module to the `AccessPath` class. */
10+
module AccessPath {
11+
/** A string that should be parsed as an access path. */
12+
abstract class Range extends string {
13+
bindingset[this]
14+
Range() { any() }
15+
}
16+
}
17+
18+
/**
19+
* A string that occurs as an access path (either identifying or input/output spec)
20+
* which might be relevant for this database.
21+
*/
22+
class AccessPath extends string instanceof AccessPath::Range {
23+
/** Gets the `n`th token on the access path as a string. */
24+
string getRawToken(int n) {
25+
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
26+
// Instead use regexpFind to match valid tokens, and supplement with a final length
27+
// check to ensure all characters were included in a token.
28+
result = this.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
29+
}
30+
31+
/** Holds if this string is not a syntactically valid access path. */
32+
predicate hasSyntaxError() {
33+
// If the lengths match, all characters must haven been included in a token
34+
// or seen by the `.` lookahead pattern.
35+
this != "" and
36+
not this.length() = sum(int n | | getRawToken(n).length() + 1) - 1
37+
}
38+
39+
/** Gets the `n`th token on the access path (if there are no syntax errors). */
40+
AccessPathToken getToken(int n) {
41+
result = this.getRawToken(n) and
42+
not hasSyntaxError()
43+
}
44+
45+
/** Gets the number of tokens on the path (if there are no syntax errors). */
46+
int getNumToken() {
47+
result = count(int n | exists(this.getRawToken(n))) and
48+
not hasSyntaxError()
49+
}
50+
51+
/** Gets the `n`th-last token, with 0 being the last token. */
52+
AccessPathToken getLastToken(int n) { result = getToken(getNumToken() - 1 - n) }
53+
}
54+
55+
/**
56+
* An access path that uses `A of B` syntax, which should now be written as `B.A`.
57+
*
58+
* This is a compatibility layer to help test at checkpoints during transition to the new syntax.
59+
*/
60+
private class LegacyAccessPath extends AccessPath {
61+
LegacyAccessPath() { this.matches("% of %") }
62+
63+
private string getRawSplit(int n) { result = this.splitAt(" of ", n) }
64+
65+
private int getNumRawSplits() { result = strictcount(int n | exists(getRawSplit(n))) }
66+
67+
override string getRawToken(int n) { result = getRawSplit(getNumRawSplits() - n - 1) }
68+
69+
override predicate hasSyntaxError() { none() }
70+
}
71+
72+
/**
73+
* An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths.
74+
*/
75+
class AccessPathToken extends string {
76+
AccessPathToken() { this = any(AccessPath path).getRawToken(_) }
77+
78+
private string getPart(int part) {
79+
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
80+
}
81+
82+
/** Gets the name of the token, such as `Member` from `Member[x]` */
83+
string getName() { result = this.getPart(1) }
84+
85+
/**
86+
* Gets the argument list, such as `1,2` from `Member[1,2]`,
87+
* or has no result if there are no arguments.
88+
*/
89+
string getArgumentList() { result = this.getPart(2) }
90+
91+
/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
92+
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n) }
93+
94+
/** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */
95+
string getAnArgument() { result = this.getArgument(_) }
96+
97+
/** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */
98+
int getNumArgument() { result = count(int n | exists(this.getArgument(n))) }
99+
}

ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll

Lines changed: 64 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ module Public {
228228
*/
229229
module Private {
230230
private import Public
231+
import AccessPathSyntax
231232

232233
newtype TSummaryComponent =
233234
TContentSummaryComponent(Content c) or
@@ -811,69 +812,46 @@ module Private {
811812
sinkElement(_, spec, _)
812813
}
813814

814-
/** Holds if the `n`th component of specification `s` is `c`. */
815-
predicate specSplit(string s, string c, int n) { relevantSpec(s) and s.splitAt(" of ", n) = c }
816-
817-
/** Holds if specification `s` has length `len`. */
818-
predicate specLength(string s, int len) { len = 1 + max(int n | specSplit(s, _, n)) }
819-
820-
/** Gets the last component of specification `s`. */
821-
string specLast(string s) {
822-
exists(int len |
823-
specLength(s, len) and
824-
specSplit(s, result, len - 1)
825-
)
815+
private class AccessPathRange extends AccessPath::Range {
816+
AccessPathRange() { relevantSpec(this) }
826817
}
827818

828819
/** Holds if specification component `c` parses as parameter `n`. */
829-
predicate parseParam(string c, ArgumentPosition pos) {
830-
specSplit(_, c, _) and
831-
exists(string body |
832-
body = c.regexpCapture("Parameter\\[([^\\]]*)\\]", 1) and
833-
pos = parseParamBody(body)
834-
)
820+
predicate parseParam(AccessPathToken token, ArgumentPosition pos) {
821+
token.getName() = "Parameter" and
822+
pos = parseParamBody(token.getAnArgument())
835823
}
836824

837825
/** Holds if specification component `c` parses as argument `n`. */
838-
predicate parseArg(string c, ParameterPosition pos) {
839-
specSplit(_, c, _) and
840-
exists(string body |
841-
body = c.regexpCapture("Argument\\[([^\\]]*)\\]", 1) and
842-
pos = parseArgBody(body)
843-
)
826+
predicate parseArg(AccessPathToken token, ParameterPosition pos) {
827+
token.getName() = "Argument" and
828+
pos = parseArgBody(token.getAnArgument())
844829
}
845830

846-
private SummaryComponent interpretComponent(string c) {
847-
specSplit(_, c, _) and
848-
(
849-
exists(ParameterPosition pos |
850-
parseArg(c, pos) and result = SummaryComponent::argument(pos)
851-
)
852-
or
853-
exists(ArgumentPosition pos |
854-
parseParam(c, pos) and result = SummaryComponent::parameter(pos)
855-
)
856-
or
857-
c = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
858-
or
859-
result = interpretComponentSpecific(c)
831+
private SummaryComponent interpretComponent(AccessPathToken token) {
832+
exists(ParameterPosition pos |
833+
parseArg(token, pos) and result = SummaryComponent::argument(pos)
860834
)
835+
or
836+
exists(ArgumentPosition pos |
837+
parseParam(token, pos) and result = SummaryComponent::parameter(pos)
838+
)
839+
or
840+
token = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
841+
or
842+
result = interpretComponentSpecific(token)
861843
}
862844

863845
/**
864846
* Holds if `spec` specifies summary component stack `stack`.
865847
*/
866-
predicate interpretSpec(string spec, SummaryComponentStack stack) {
848+
predicate interpretSpec(AccessPath spec, SummaryComponentStack stack) {
867849
interpretSpec(spec, 0, stack)
868850
}
869851

870-
private predicate interpretSpec(string spec, int idx, SummaryComponentStack stack) {
871-
exists(string c |
872-
relevantSpec(spec) and
873-
specLength(spec, idx + 1) and
874-
specSplit(spec, c, idx) and
875-
stack = SummaryComponentStack::singleton(interpretComponent(c))
876-
)
852+
private predicate interpretSpec(AccessPath spec, int idx, SummaryComponentStack stack) {
853+
idx = spec.getNumToken() - 1 and
854+
stack = SummaryComponentStack::singleton(interpretComponent(spec.getLastToken(idx)))
877855
or
878856
exists(SummaryComponent head, SummaryComponentStack tail |
879857
interpretSpec(spec, idx, head, tail) and
@@ -882,13 +860,10 @@ module Private {
882860
}
883861

884862
private predicate interpretSpec(
885-
string output, int idx, SummaryComponent head, SummaryComponentStack tail
863+
AccessPath output, int idx, SummaryComponent head, SummaryComponentStack tail
886864
) {
887-
exists(string c |
888-
interpretSpec(output, idx + 1, tail) and
889-
specSplit(output, c, idx) and
890-
head = interpretComponent(c)
891-
)
865+
interpretSpec(output, idx + 1, tail) and
866+
head = interpretComponent(output.getLastToken(idx))
892867
}
893868

894869
private class MkStack extends RequiredSummaryComponentStack {
@@ -903,7 +878,7 @@ module Private {
903878
override predicate propagatesFlow(
904879
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
905880
) {
906-
exists(string inSpec, string outSpec, string kind |
881+
exists(AccessPath inSpec, AccessPath outSpec, string kind |
907882
summaryElement(this, inSpec, outSpec, kind) and
908883
interpretSpec(inSpec, input) and
909884
interpretSpec(outSpec, output)
@@ -916,50 +891,55 @@ module Private {
916891
}
917892

918893
/** Holds if component `c` of specification `spec` cannot be parsed. */
919-
predicate invalidSpecComponent(string spec, string c) {
920-
specSplit(spec, c, _) and
894+
predicate invalidSpecComponent(AccessPath spec, string c) {
895+
c = spec.getRawToken(_) and
921896
not exists(interpretComponent(c))
922897
}
923898

924-
private predicate inputNeedsReference(string c) {
925-
c = "Argument" or
926-
parseArg(c, _) or
899+
private predicate inputNeedsReference(AccessPathToken c) {
900+
c.getName() = "Argument" or
927901
inputNeedsReferenceSpecific(c)
928902
}
929903

930-
private predicate outputNeedsReference(string c) {
931-
c = "Argument" or
932-
parseArg(c, _) or
933-
c = "ReturnValue" or
904+
private predicate outputNeedsReference(AccessPathToken c) {
905+
c.getName() = ["Argument", "ReturnValue"] or
934906
outputNeedsReferenceSpecific(c)
935907
}
936908

937-
private predicate sourceElementRef(InterpretNode ref, string output, string kind) {
909+
private predicate sourceElementRef(InterpretNode ref, AccessPath output, string kind) {
938910
exists(SourceOrSinkElement e |
939911
sourceElement(e, output, kind) and
940-
if outputNeedsReference(specLast(output))
912+
if outputNeedsReference(output.getToken(0))
941913
then e = ref.getCallTarget()
942914
else e = ref.asElement()
943915
)
944916
}
945917

946-
private predicate sinkElementRef(InterpretNode ref, string input, string kind) {
918+
private predicate sinkElementRef(InterpretNode ref, AccessPath input, string kind) {
947919
exists(SourceOrSinkElement e |
948920
sinkElement(e, input, kind) and
949-
if inputNeedsReference(specLast(input))
921+
if inputNeedsReference(input.getToken(0))
950922
then e = ref.getCallTarget()
951923
else e = ref.asElement()
952924
)
953925
}
954926

955-
private predicate interpretOutput(string output, int idx, InterpretNode ref, InterpretNode node) {
927+
private predicate interpretOutput(
928+
AccessPath output, int idx, InterpretNode ref, InterpretNode node
929+
) {
956930
sourceElementRef(ref, output, _) and
957-
specLength(output, idx) and
958-
node = ref
931+
idx = output.getNumToken() and
932+
(
933+
if output = ""
934+
then
935+
// Allow language-specific interpretation of the empty access path
936+
interpretOutputSpecific("", ref, node)
937+
else node = ref
938+
)
959939
or
960-
exists(InterpretNode mid, string c |
940+
exists(InterpretNode mid, AccessPathToken c |
961941
interpretOutput(output, idx + 1, ref, mid) and
962-
specSplit(output, c, idx)
942+
c = output.getLastToken(idx)
963943
|
964944
exists(ArgumentPosition apos, ParameterPosition ppos |
965945
node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), apos) and
@@ -982,14 +962,22 @@ module Private {
982962
)
983963
}
984964

985-
private predicate interpretInput(string input, int idx, InterpretNode ref, InterpretNode node) {
965+
private predicate interpretInput(
966+
AccessPath input, int idx, InterpretNode ref, InterpretNode node
967+
) {
986968
sinkElementRef(ref, input, _) and
987-
specLength(input, idx) and
988-
node = ref
969+
idx = input.getNumToken() and
970+
(
971+
if input = ""
972+
then
973+
// Allow language-specific interpretation of the empty access path
974+
interpretInputSpecific("", ref, node)
975+
else node = ref
976+
)
989977
or
990-
exists(InterpretNode mid, string c |
978+
exists(InterpretNode mid, AccessPathToken c |
991979
interpretInput(input, idx + 1, ref, mid) and
992-
specSplit(input, c, idx)
980+
c = input.getLastToken(idx)
993981
|
994982
exists(ArgumentPosition apos, ParameterPosition ppos |
995983
node.asNode().(ArgNode).argumentOf(mid.asCall(), apos) and

ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImplSpecific.qll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -159,16 +159,16 @@ module ParsePositions {
159159
private import FlowSummaryImpl
160160

161161
private predicate isParamBody(string body) {
162-
exists(string c |
163-
Private::External::specSplit(_, c, _) and
164-
body = c.regexpCapture("Parameter\\[([^\\]]*)\\]", 1)
162+
exists(AccessPathToken tok |
163+
tok.getName() = "Parameter" and
164+
body = tok.getAnArgument()
165165
)
166166
}
167167

168168
private predicate isArgBody(string body) {
169-
exists(string c |
170-
Private::External::specSplit(_, c, _) and
171-
body = c.regexpCapture("Argument\\[([^\\]]*)\\]", 1)
169+
exists(AccessPathToken tok |
170+
tok.getName() = "Argument" and
171+
body = tok.getAnArgument()
172172
)
173173
}
174174

ruby/ql/test/library-tests/dataflow/summaries/Summaries.ql

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,16 @@ import codeql.ruby.dataflow.FlowSummary
77
import DataFlow::PathGraph
88
import codeql.ruby.TaintTracking
99
import codeql.ruby.dataflow.internal.FlowSummaryImpl
10+
import codeql.ruby.dataflow.internal.AccessPathSyntax
1011

1112
query predicate invalidSpecComponent(SummarizedCallable sc, string s, string c) {
1213
(sc.propagatesFlowExt(s, _, _) or sc.propagatesFlowExt(_, s, _)) and
1314
Private::External::invalidSpecComponent(s, c)
1415
}
1516

16-
query predicate invalidOutputSpecComponent(SummarizedCallable sc, string s, string c) {
17+
query predicate invalidOutputSpecComponent(SummarizedCallable sc, AccessPath s, AccessPathToken c) {
1718
sc.propagatesFlowExt(_, s, _) and
18-
Private::External::specSplit(s, c, _) and
19+
c = s.getToken(_) and
1920
c = "ArrayElement" // not allowed in output specs; use `ArrayElement[?] instead
2021
}
2122

0 commit comments

Comments
 (0)