Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 80175a9

Browse files
authored
Python: Compiles and mostly pass tests
- add flowsummaries shared files - register in indentical files - fix initial non-monotonic recursions - add DataFlowSourceCall - add resolvedCall - add SourceParameterNode failing tests: - 3/library-tests/with/test.ql
1 parent 1d10f14 commit 80175a9

35 files changed

Lines changed: 2301 additions & 72 deletions

config/identical-files.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@
7070
"DataFlow Java/C# Flow Summaries": [
7171
"java/ql/lib/semmle/code/java/dataflow/internal/FlowSummaryImpl.qll",
7272
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/FlowSummaryImpl.qll",
73-
"ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll"
73+
"ruby/ql/lib/codeql/ruby/dataflow/internal/FlowSummaryImpl.qll",
74+
"python/ql/lib/semmle/python/dataflow/new/internal/FlowSummaryImpl.qll"
7475
],
7576
"SsaReadPosition Java/C#": [
7677
"java/ql/lib/semmle/code/java/dataflow/internal/rangeanalysis/SsaReadPositionCommon.qll",
@@ -515,7 +516,8 @@
515516
"csharp/ql/lib/semmle/code/csharp/dataflow/internal/AccessPathSyntax.qll",
516517
"java/ql/lib/semmle/code/java/dataflow/internal/AccessPathSyntax.qll",
517518
"javascript/ql/lib/semmle/javascript/frameworks/data/internal/AccessPathSyntax.qll",
518-
"ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll"
519+
"ruby/ql/lib/codeql/ruby/dataflow/internal/AccessPathSyntax.qll",
520+
"python/ql/lib/semmle/python/dataflow/new/internal/AccessPathSyntax.qll"
519521
],
520522
"IncompleteUrlSubstringSanitization": [
521523
"javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll",

python/ql/lib/semmle/python/Concepts.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -828,7 +828,7 @@ module HTTP {
828828
}
829829

830830
/** A parameter that will receive parts of the url when handling an incoming request. */
831-
private class RoutedParameter extends RemoteFlowSource::Range, DataFlow::ParameterNode {
831+
private class RoutedParameter extends RemoteFlowSource::Range, DataFlow::SourceParameterNode {
832832
RequestHandler handler;
833833

834834
RoutedParameter() { this.getParameter() = handler.getARoutedParameter() }
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/** Provides classes and predicates for defining flow summaries. */
2+
3+
import python
4+
import semmle.python.dataflow.new.DataFlow
5+
private import internal.FlowSummaryImpl as Impl
6+
private import internal.DataFlowUtil
7+
private import internal.DataFlowPrivate
8+
9+
// import all instances below
10+
private module Summaries { }
11+
12+
class SummaryComponent = Impl::Public::SummaryComponent;
13+
14+
/** Provides predicates for constructing summary components. */
15+
module SummaryComponent {
16+
private import Impl::Public::SummaryComponent as SC
17+
18+
predicate parameter = SC::parameter/1;
19+
20+
predicate argument = SC::argument/1;
21+
22+
predicate content = SC::content/1;
23+
24+
/** Gets a summary component that represents a list element. */
25+
SummaryComponent listElement() { result = content(any(ListElementContent c)) }
26+
27+
/** Gets a summary component that represents the return value of a call. */
28+
SummaryComponent return() { result = SC::return(any(ReturnKind rk)) }
29+
}
30+
31+
class SummaryComponentStack = Impl::Public::SummaryComponentStack;
32+
33+
/** Provides predicates for constructing stacks of summary components. */
34+
module SummaryComponentStack {
35+
private import Impl::Public::SummaryComponentStack as SCS
36+
37+
predicate singleton = SCS::singleton/1;
38+
39+
predicate push = SCS::push/2;
40+
41+
predicate argument = SCS::argument/1;
42+
43+
/** Gets a singleton stack representing the return value of a call. */
44+
SummaryComponentStack return() { result = singleton(SummaryComponent::return()) }
45+
}
46+
47+
/** A callable with a flow summary, identified by a unique string. */
48+
abstract class SummarizedCallable extends LibraryCallable {
49+
bindingset[this]
50+
SummarizedCallable() { any() }
51+
52+
/**
53+
* Holds if data may flow from `input` to `output` through this callable.
54+
*
55+
* `preservesValue` indicates whether this is a value-preserving step
56+
* or a taint-step.
57+
*
58+
* Input specifications are restricted to stacks that end with
59+
* `SummaryComponent::argument(_)`, preceded by zero or more
60+
* `SummaryComponent::return()` or `SummaryComponent::content(_)` components.
61+
*
62+
* Output specifications are restricted to stacks that end with
63+
* `SummaryComponent::return()` or `SummaryComponent::argument(_)`.
64+
*
65+
* Output stacks ending with `SummaryComponent::return()` can be preceded by zero
66+
* or more `SummaryComponent::content(_)` components.
67+
*
68+
* Output stacks ending with `SummaryComponent::argument(_)` can be preceded by an
69+
* optional `SummaryComponent::parameter(_)` component, which in turn can be preceded
70+
* by zero or more `SummaryComponent::content(_)` components.
71+
*/
72+
pragma[nomagic]
73+
predicate propagatesFlow(
74+
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
75+
) {
76+
none()
77+
}
78+
79+
/**
80+
* Same as
81+
*
82+
* ```ql
83+
* propagatesFlow(
84+
* SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
85+
* )
86+
* ```
87+
*
88+
* but uses an external (string) representation of the input and output stacks.
89+
*/
90+
pragma[nomagic]
91+
predicate propagatesFlowExt(string input, string output, boolean preservesValue) { none() }
92+
93+
/**
94+
* Holds if values stored inside `content` are cleared on objects passed as
95+
* the `i`th argument to this callable.
96+
*/
97+
pragma[nomagic]
98+
predicate clearsContent(int i, DataFlow::Content content) { none() }
99+
}
100+
101+
private class SummarizedCallableAdapter extends Impl::Public::SummarizedCallable {
102+
private SummarizedCallable sc;
103+
104+
SummarizedCallableAdapter() { this = TLibraryCallable(sc) }
105+
106+
final override predicate propagatesFlow(
107+
SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue
108+
) {
109+
sc.propagatesFlow(input, output, preservesValue)
110+
}
111+
112+
final override predicate clearsContent(ParameterPosition pos, DataFlow::Content content) {
113+
sc.clearsContent(pos, content)
114+
}
115+
}
116+
117+
class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack;

python/ql/lib/semmle/python/dataflow/new/SensitiveDataSources.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ private module SensitiveDataModeling {
305305
}
306306

307307
/** A parameter where the name indicates it will receive sensitive data. */
308-
class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::ParameterNode {
308+
class SensitiveParameter extends SensitiveDataSource::Range, DataFlow::SourceParameterNode {
309309
SensitiveDataClassification classification;
310310

311311
SensitiveParameter() { this.getParameter().getName() = sensitiveString(classification) }
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
/**
2+
* Module for parsing access paths from CSV models, both the identifying access path used
3+
* by dynamic languages, and the input/output specifications for summary steps.
4+
*
5+
* This file is used by the shared data flow library and by the JavaScript libraries
6+
* (which does not use the shared data flow libraries).
7+
*/
8+
9+
/**
10+
* Convenience-predicate for extracting two capture groups at once.
11+
*/
12+
bindingset[input, regexp]
13+
private predicate regexpCaptureTwo(string input, string regexp, string capture1, string capture2) {
14+
capture1 = input.regexpCapture(regexp, 1) and
15+
capture2 = input.regexpCapture(regexp, 2)
16+
}
17+
18+
/** Companion module to the `AccessPath` class. */
19+
module AccessPath {
20+
/** A string that should be parsed as an access path. */
21+
abstract class Range extends string {
22+
bindingset[this]
23+
Range() { any() }
24+
}
25+
26+
/**
27+
* Parses an integer constant `n` or interval `n1..n2` (inclusive) and gets the value
28+
* of the constant or any value contained in the interval.
29+
*/
30+
bindingset[arg]
31+
int parseInt(string arg) {
32+
result = arg.toInt()
33+
or
34+
// Match "n1..n2"
35+
exists(string lo, string hi |
36+
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.(-?\\d+)", lo, hi) and
37+
result = [lo.toInt() .. hi.toInt()]
38+
)
39+
}
40+
41+
/**
42+
* Parses a lower-bounded interval `n..` and gets the lower bound.
43+
*/
44+
bindingset[arg]
45+
private int parseLowerBound(string arg) {
46+
result = arg.regexpCapture("(-?\\d+)\\.\\.", 1).toInt()
47+
}
48+
49+
/**
50+
* Parses an integer constant or interval (bounded or unbounded) that explicitly
51+
* references the arity, such as `N-1` or `N-3..N-1`.
52+
*
53+
* Note that expressions of form `N-x` will never resolve to a negative index,
54+
* even if `N` is zero (it will have no result in that case).
55+
*/
56+
bindingset[arg, arity]
57+
private int parseIntWithExplicitArity(string arg, int arity) {
58+
result >= 0 and // do not allow N-1 to resolve to a negative index
59+
exists(string lo |
60+
// N-x
61+
lo = arg.regexpCapture("N-(\\d+)", 1) and
62+
result = arity - lo.toInt()
63+
or
64+
// N-x..
65+
lo = arg.regexpCapture("N-(\\d+)\\.\\.", 1) and
66+
result = [arity - lo.toInt(), arity - 1]
67+
)
68+
or
69+
exists(string lo, string hi |
70+
// x..N-y
71+
regexpCaptureTwo(arg, "(-?\\d+)\\.\\.N-(\\d+)", lo, hi) and
72+
result = [lo.toInt() .. arity - hi.toInt()]
73+
or
74+
// N-x..N-y
75+
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.N-(\\d+)", lo, hi) and
76+
result = [arity - lo.toInt() .. arity - hi.toInt()] and
77+
result >= 0
78+
or
79+
// N-x..y
80+
regexpCaptureTwo(arg, "N-(\\d+)\\.\\.(\\d+)", lo, hi) and
81+
result = [arity - lo.toInt() .. hi.toInt()] and
82+
result >= 0
83+
)
84+
}
85+
86+
/**
87+
* Parses an integer constant or interval (bounded or unbounded) and gets any
88+
* of the integers contained within (of which there may be infinitely many).
89+
*
90+
* Has no result for arguments involving an explicit arity, such as `N-1`.
91+
*/
92+
bindingset[arg, result]
93+
int parseIntUnbounded(string arg) {
94+
result = parseInt(arg)
95+
or
96+
result >= parseLowerBound(arg)
97+
}
98+
99+
/**
100+
* Parses an integer constant or interval (bounded or unbounded) that
101+
* may reference the arity of a call, such as `N-1` or `N-3..N-1`.
102+
*
103+
* Note that expressions of form `N-x` will never resolve to a negative index,
104+
* even if `N` is zero (it will have no result in that case).
105+
*/
106+
bindingset[arg, arity]
107+
int parseIntWithArity(string arg, int arity) {
108+
result = parseInt(arg)
109+
or
110+
result in [parseLowerBound(arg) .. arity - 1]
111+
or
112+
result = parseIntWithExplicitArity(arg, arity)
113+
}
114+
}
115+
116+
/** Gets the `n`th token on the access path as a string. */
117+
private string getRawToken(AccessPath path, int n) {
118+
// Avoid splitting by '.' since tokens may contain dots, e.g. `Field[foo.Bar.x]`.
119+
// Instead use regexpFind to match valid tokens, and supplement with a final length
120+
// check (in `AccessPath.hasSyntaxError`) to ensure all characters were included in a token.
121+
result = path.regexpFind("\\w+(?:\\[[^\\]]*\\])?(?=\\.|$)", n, _)
122+
}
123+
124+
/**
125+
* A string that occurs as an access path (either identifying or input/output spec)
126+
* which might be relevant for this database.
127+
*/
128+
class AccessPath extends string instanceof AccessPath::Range {
129+
/** Holds if this string is not a syntactically valid access path. */
130+
predicate hasSyntaxError() {
131+
// If the lengths match, all characters must haven been included in a token
132+
// or seen by the `.` lookahead pattern.
133+
this != "" and
134+
not this.length() = sum(int n | | getRawToken(this, n).length() + 1) - 1
135+
}
136+
137+
/** Gets the `n`th token on the access path (if there are no syntax errors). */
138+
AccessPathToken getToken(int n) {
139+
result = getRawToken(this, n) and
140+
not this.hasSyntaxError()
141+
}
142+
143+
/** Gets the number of tokens on the path (if there are no syntax errors). */
144+
int getNumToken() {
145+
result = count(int n | exists(getRawToken(this, n))) and
146+
not this.hasSyntaxError()
147+
}
148+
}
149+
150+
/**
151+
* An access part token such as `Argument[1]` or `ReturnValue`, appearing in one or more access paths.
152+
*/
153+
class AccessPathToken extends string {
154+
AccessPathToken() { this = getRawToken(any(AccessPath path), _) }
155+
156+
private string getPart(int part) {
157+
result = this.regexpCapture("([^\\[]+)(?:\\[([^\\]]*)\\])?", part)
158+
}
159+
160+
/** Gets the name of the token, such as `Member` from `Member[x]` */
161+
string getName() { result = this.getPart(1) }
162+
163+
/**
164+
* Gets the argument list, such as `1,2` from `Member[1,2]`,
165+
* or has no result if there are no arguments.
166+
*/
167+
string getArgumentList() { result = this.getPart(2) }
168+
169+
/** Gets the `n`th argument to this token, such as `x` or `y` from `Member[x,y]`. */
170+
string getArgument(int n) { result = this.getArgumentList().splitAt(",", n).trim() }
171+
172+
/** Gets an argument to this token, such as `x` or `y` from `Member[x,y]`. */
173+
string getAnArgument() { result = this.getArgument(_) }
174+
175+
/** Gets the number of arguments to this token, such as 2 for `Member[x,y]` or zero for `ReturnValue`. */
176+
int getNumArgument() { result = count(int n | exists(this.getArgument(n))) }
177+
}

0 commit comments

Comments
 (0)