Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 753c557

Browse files
committed
Java: use AccessPathSyntax.qll to parse input/output summary specs
1 parent 53935db commit 753c557

6 files changed

Lines changed: 187 additions & 89 deletions

File tree

config/identical-files.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,5 +501,9 @@
501501
"javascript/ql/lib/tutorial.qll",
502502
"python/ql/lib/tutorial.qll",
503503
"ruby/ql/lib/tutorial.qll"
504+
],
505+
"AccessPathSyntax": [
506+
"java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll",
507+
"javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll"
504508
]
505509
}

java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ private import semmle.code.java.dataflow.DataFlow::DataFlow
6969
private import internal.DataFlowPrivate
7070
private import internal.FlowSummaryImpl::Private::External
7171
private import internal.FlowSummaryImplSpecific
72+
private import internal.AccessPathSyntax
7273
private import FlowSummary
7374

7475
/**
@@ -557,7 +558,7 @@ module CsvValidation {
557558
not (part = "Argument" and pred = "sink") and
558559
not parseArg(part, _)
559560
or
560-
part = specLast(input) and
561+
part = input.(AccessPath).getToken(0) and
561562
parseParam(part, _)
562563
) and
563564
msg = "Unrecognized input specification \"" + part + "\" in " + pred + " model."
@@ -665,13 +666,16 @@ Element interpretElement(
665666
)
666667
}
667668

668-
private predicate parseField(string c, FieldContent f) {
669-
specSplit(_, c, _) and
670-
exists(string fieldRegex, string package, string className, string fieldName |
671-
fieldRegex = "^Field\\[(.*)\\.([^.]+)\\.([^.]+)\\]$" and
672-
package = c.regexpCapture(fieldRegex, 1) and
673-
className = c.regexpCapture(fieldRegex, 2) and
674-
fieldName = c.regexpCapture(fieldRegex, 3) and
669+
private predicate parseField(AccessPathToken c, FieldContent f) {
670+
exists(
671+
string fieldRegex, string qualifiedName, string package, string className, string fieldName
672+
|
673+
c.getName() = "Field" and
674+
qualifiedName = c.getAnArgument() and
675+
fieldRegex = "^(.*)\\.([^.]+)\\.([^.]+)$" and
676+
package = qualifiedName.regexpCapture(fieldRegex, 1) and
677+
className = qualifiedName.regexpCapture(fieldRegex, 2) and
678+
fieldName = qualifiedName.regexpCapture(fieldRegex, 3) and
675679
f.getField().hasQualifiedName(package, className, fieldName)
676680
)
677681
}
@@ -687,13 +691,13 @@ class SyntheticField extends string {
687691
Type getType() { result instanceof TypeObject }
688692
}
689693

690-
private predicate parseSynthField(string c, string f) {
691-
specSplit(_, c, _) and
692-
c.regexpCapture("SyntheticField\\[([.a-zA-Z0-9$]+)\\]", 1) = f
694+
private predicate parseSynthField(AccessPathToken c, string f) {
695+
c.getName() = "SyntheticField" and
696+
f = c.getAnArgument()
693697
}
694698

695699
/** Holds if the specification component parses as a `Content`. */
696-
predicate parseContent(string component, Content content) {
700+
predicate parseContent(AccessPathToken component, Content content) {
697701
parseField(component, content)
698702
or
699703
parseSynthField(component, content.(SyntheticFieldContent).getField())
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/**
2+
* Module for parsing access paths from CSV models, both the identifying access path used
3+
* by dynamic languages, and the input/output specifications for summary steps.
4+
*
5+
* This file is used by shared data flow library and by the JavaScript libraries
6+
* (which does not use the shared data flow libraries).
7+
*/
8+
9+
/** Companion module to the `AccessPath` class. */
10+
module AccessPath {
11+
/** A string that should be parsed as an access path. */
12+
abstract class Range extends string {
13+
bindingset[this]
14+
Range() { any() }
15+
}
16+
}
17+
18+
/**
19+
* A string that occurs as an access path (either identifying or input/output spec)
20+
* which might be relevant for this database.
21+
*/
22+
class AccessPath extends string instanceof AccessPath::Range {
23+
/** Gets the `n`th token on the access path as a string. */
24+
string getRawToken(int n) {
25+
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
26+
// Instead use regexpFind to match valid tokens, and supplement with a final length
27+
// check to ensure all characters were included in a token.
28+
result = this.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
29+
}
30+
31+
/** Holds if this string is not a syntactically valid access path. */
32+
predicate hasSyntaxError() {
33+
// If the lengths match, all characters must haven been included in a token
34+
// or seen by the `.` lookahead pattern.
35+
this != "" and
36+
not this.length() = sum(int n | | getRawToken(n).length() + 1) - 1
37+
}
38+
39+
/** Gets the `n`th token on the access path (if there are no syntax errors). */
40+
AccessPathToken getToken(int n) {
41+
result = this.getRawToken(n) and
42+
not hasSyntaxError()
43+
}
44+
45+
/** Gets the number of tokens on the path (if there are no syntax errors). */
46+
int getNumToken() {
47+
result = count(int n | exists(this.getRawToken(n))) and
48+
not hasSyntaxError()
49+
}
50+
51+
/** Gets the `n`th-last token, with 0 being the last token. */
52+
AccessPathToken getLastToken(int n) { result = getToken(getNumToken() - 1 - n) }
53+
}
54+
55+
/**
56+
* An access path that uses `A of B` syntax, which should now be written as `B.A`.
57+
*
58+
* This is a compatibility layer to help test at checkpoints during transition to the new syntax.
59+
*/
60+
private class LegacyAccessPath extends AccessPath {
61+
LegacyAccessPath() { this.matches("% of %") }
62+
63+
private string getRawSplit(int n) { result = this.splitAt(" of ", n) }
64+
65+
private int getNumRawSplits() { result = strictcount(int n | exists(getRawSplit(n))) }
66+
67+
override string getRawToken(int n) { result = getRawSplit(getNumRawSplits() - n - 1) }
68+
69+
override predicate hasSyntaxError() { none() }
70+
}
71+
72+
/**
73+
* An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths.
74+
*/
75+
class AccessPathToken extends string {
76+
AccessPathToken() { this = any(AccessPath path).getRawToken(_) }
77+
78+
private string getPart(int part) {
79+
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
80+
}
81+
82+
/** Gets the name of the token, such as `Member` from `Member[x]` */
83+
string getName() { result = this.getPart(1) }
84+
85+
/**
86+
* Gets the argument list, such as `1,2` from `Member[1,2]`,
87+
* or has no result if there are no arguments.
88+
*/
89+
string getArgumentList() { result = this.getPart(2) }
90+
91+
/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
92+
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n) }
93+
94+
/** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */
95+
string getAnArgument() { result = this.getArgument(_) }
96+
97+
/** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */
98+
int getNumArgument() { result = count(int n | exists(this.getArgument(n))) }
99+
}

java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll

Lines changed: 64 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ module Public {
228228
*/
229229
module Private {
230230
private import Public
231+
import AccessPathSyntax
231232

232233
newtype TSummaryComponent =
233234
TContentSummaryComponent(Content c) or
@@ -811,69 +812,46 @@ module Private {
811812
sinkElement(_, spec, _)
812813
}
813814

814-
/** Holds if the `n`th component of specification `s` is `c`. */
815-
predicate specSplit(string s, string c, int n) { relevantSpec(s) and s.splitAt(" of ", n) = c }
816-
817-
/** Holds if specification `s` has length `len`. */
818-
predicate specLength(string s, int len) { len = 1 + max(int n | specSplit(s, _, n)) }
819-
820-
/** Gets the last component of specification `s`. */
821-
string specLast(string s) {
822-
exists(int len |
823-
specLength(s, len) and
824-
specSplit(s, result, len - 1)
825-
)
815+
private class AccessPathRange extends AccessPath::Range {
816+
AccessPathRange() { relevantSpec(this) }
826817
}
827818

828819
/** Holds if specification component `c` parses as parameter `n`. */
829-
predicate parseParam(string c, ArgumentPosition pos) {
830-
specSplit(_, c, _) and
831-
exists(string body |
832-
body = c.regexpCapture("Parameter\\[([^\\]]*)\\]", 1) and
833-
pos = parseParamBody(body)
834-
)
820+
predicate parseParam(AccessPathToken token, ArgumentPosition pos) {
821+
token.getName() = "Parameter" and
822+
pos = parseParamBody(token.getAnArgument())
835823
}
836824

837825
/** Holds if specification component `c` parses as argument `n`. */
838-
predicate parseArg(string c, ParameterPosition pos) {
839-
specSplit(_, c, _) and
840-
exists(string body |
841-
body = c.regexpCapture("Argument\\[([^\\]]*)\\]", 1) and
842-
pos = parseArgBody(body)
843-
)
826+
predicate parseArg(AccessPathToken token, ParameterPosition pos) {
827+
token.getName() = "Argument" and
828+
pos = parseArgBody(token.getAnArgument())
844829
}
845830

846-
private SummaryComponent interpretComponent(string c) {
847-
specSplit(_, c, _) and
848-
(
849-
exists(ParameterPosition pos |
850-
parseArg(c, pos) and result = SummaryComponent::argument(pos)
851-
)
852-
or
853-
exists(ArgumentPosition pos |
854-
parseParam(c, pos) and result = SummaryComponent::parameter(pos)
855-
)
856-
or
857-
c = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
858-
or
859-
result = interpretComponentSpecific(c)
831+
private SummaryComponent interpretComponent(AccessPathToken token) {
832+
exists(ParameterPosition pos |
833+
parseArg(token, pos) and result = SummaryComponent::argument(pos)
860834
)
835+
or
836+
exists(ArgumentPosition pos |
837+
parseParam(token, pos) and result = SummaryComponent::parameter(pos)
838+
)
839+
or
840+
token = "ReturnValue" and result = SummaryComponent::return(getReturnValueKind())
841+
or
842+
result = interpretComponentSpecific(token)
861843
}
862844

863845
/**
864846
* Holds if `spec` specifies summary component stack `stack`.
865847
*/
866-
predicate interpretSpec(string spec, SummaryComponentStack stack) {
848+
predicate interpretSpec(AccessPath spec, SummaryComponentStack stack) {
867849
interpretSpec(spec, 0, stack)
868850
}
869851

870-
private predicate interpretSpec(string spec, int idx, SummaryComponentStack stack) {
871-
exists(string c |
872-
relevantSpec(spec) and
873-
specLength(spec, idx + 1) and
874-
specSplit(spec, c, idx) and
875-
stack = SummaryComponentStack::singleton(interpretComponent(c))
876-
)
852+
private predicate interpretSpec(AccessPath spec, int idx, SummaryComponentStack stack) {
853+
idx = spec.getNumToken() - 1 and
854+
stack = SummaryComponentStack::singleton(interpretComponent(spec.getLastToken(idx)))
877855
or
878856
exists(SummaryComponent head, SummaryComponentStack tail |
879857
interpretSpec(spec, idx, head, tail) and
@@ -882,13 +860,10 @@ module Private {
882860
}
883861

884862
private predicate interpretSpec(
885-
string output, int idx, SummaryComponent head, SummaryComponentStack tail
863+
AccessPath output, int idx, SummaryComponent head, SummaryComponentStack tail
886864
) {
887-
exists(string c |
888-
interpretSpec(output, idx + 1, tail) and
889-
specSplit(output, c, idx) and
890-
head = interpretComponent(c)
891-
)
865+
interpretSpec(output, idx + 1, tail) and
866+
head = interpretComponent(output.getLastToken(idx))
892867
}
893868

894869
private class MkStack extends RequiredSummaryComponentStack {
@@ -903,7 +878,7 @@ module Private {
903878
override predicate propagatesFlow(
904879
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
905880
) {
906-
exists(string inSpec, string outSpec, string kind |
881+
exists(AccessPath inSpec, AccessPath outSpec, string kind |
907882
summaryElement(this, inSpec, outSpec, kind) and
908883
interpretSpec(inSpec, input) and
909884
interpretSpec(outSpec, output)
@@ -916,50 +891,55 @@ module Private {
916891
}
917892

918893
/** Holds if component `c` of specification `spec` cannot be parsed. */
919-
predicate invalidSpecComponent(string spec, string c) {
920-
specSplit(spec, c, _) and
894+
predicate invalidSpecComponent(AccessPath spec, string c) {
895+
c = spec.getRawToken(_) and
921896
not exists(interpretComponent(c))
922897
}
923898

924-
private predicate inputNeedsReference(string c) {
925-
c = "Argument" or
926-
parseArg(c, _) or
899+
private predicate inputNeedsReference(AccessPathToken c) {
900+
c.getName() = "Argument" or
927901
inputNeedsReferenceSpecific(c)
928902
}
929903

930-
private predicate outputNeedsReference(string c) {
931-
c = "Argument" or
932-
parseArg(c, _) or
933-
c = "ReturnValue" or
904+
private predicate outputNeedsReference(AccessPathToken c) {
905+
c.getName() = ["Argument", "ReturnValue"] or
934906
outputNeedsReferenceSpecific(c)
935907
}
936908

937-
private predicate sourceElementRef(InterpretNode ref, string output, string kind) {
909+
private predicate sourceElementRef(InterpretNode ref, AccessPath output, string kind) {
938910
exists(SourceOrSinkElement e |
939911
sourceElement(e, output, kind) and
940-
if outputNeedsReference(specLast(output))
912+
if outputNeedsReference(output.getToken(0))
941913
then e = ref.getCallTarget()
942914
else e = ref.asElement()
943915
)
944916
}
945917

946-
private predicate sinkElementRef(InterpretNode ref, string input, string kind) {
918+
private predicate sinkElementRef(InterpretNode ref, AccessPath input, string kind) {
947919
exists(SourceOrSinkElement e |
948920
sinkElement(e, input, kind) and
949-
if inputNeedsReference(specLast(input))
921+
if inputNeedsReference(input.getToken(0))
950922
then e = ref.getCallTarget()
951923
else e = ref.asElement()
952924
)
953925
}
954926

955-
private predicate interpretOutput(string output, int idx, InterpretNode ref, InterpretNode node) {
927+
private predicate interpretOutput(
928+
AccessPath output, int idx, InterpretNode ref, InterpretNode node
929+
) {
956930
sourceElementRef(ref, output, _) and
957-
specLength(output, idx) and
958-
node = ref
931+
idx = output.getNumToken() and
932+
(
933+
if output = ""
934+
then
935+
// Allow language-specific interpretation of the empty access path
936+
interpretOutputSpecific("", ref, node)
937+
else node = ref
938+
)
959939
or
960-
exists(InterpretNode mid, string c |
940+
exists(InterpretNode mid, AccessPathToken c |
961941
interpretOutput(output, idx + 1, ref, mid) and
962-
specSplit(output, c, idx)
942+
c = output.getLastToken(idx)
963943
|
964944
exists(ArgumentPosition apos, ParameterPosition ppos |
965945
node.asNode().(PostUpdateNode).getPreUpdateNode().(ArgNode).argumentOf(mid.asCall(), apos) and
@@ -982,14 +962,22 @@ module Private {
982962
)
983963
}
984964

985-
private predicate interpretInput(string input, int idx, InterpretNode ref, InterpretNode node) {
965+
private predicate interpretInput(
966+
AccessPath input, int idx, InterpretNode ref, InterpretNode node
967+
) {
986968
sinkElementRef(ref, input, _) and
987-
specLength(input, idx) and
988-
node = ref
969+
idx = input.getNumToken() and
970+
(
971+
if input = ""
972+
then
973+
// Allow language-specific interpretation of the empty access path
974+
interpretInputSpecific("", ref, node)
975+
else node = ref
976+
)
989977
or
990-
exists(InterpretNode mid, string c |
978+
exists(InterpretNode mid, AccessPathToken c |
991979
interpretInput(input, idx + 1, ref, mid) and
992-
specSplit(input, c, idx)
980+
c = input.getLastToken(idx)
993981
|
994982
exists(ArgumentPosition apos, ParameterPosition ppos |
995983
node.asNode().(ArgNode).argumentOf(mid.asCall(), apos) and

0 commit comments

Comments
 (0)