Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 00ea0ce

Browse files
committed
Python: More Flask modeling kinda works
It "kinda" works now, but it really is not a pretty solution. Adding all these "tracked" objects is SUPER annoying... it _would_ be possible to skip them, but that seems like it will give the wrong edges for dataflow/taintflow queries :| A good chunk of it should be able to be removed with access-paths like C# does for library modeling. Some of it could be solved by better type-tracking API like API Graphs... but it seems like we generally are just lacking the nice-to-have features like `.getAMemberCall` and the like. See https://github.com/github/codeql/pull/4082/files#diff-9aa94c4d713ef9d8da73918ff53db774L33
1 parent 3c08590 commit 00ea0ce

4 files changed

Lines changed: 212 additions & 45 deletions

File tree

python/ql/src/experimental/dataflow/internal/DataFlowUtil.qll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,28 @@ EssaNode importMember(string moduleName, string memberName) {
6565
result.getVar().(AssignmentDefinition).getSourceVariable() = var
6666
)
6767
}
68+
69+
abstract class ListLike extends Node {
70+
/** Gets a Node that is an access of an element of this list-like object */
71+
Node getElementAccess() {
72+
// subscript
73+
result.asCfgNode().(SubscriptNode).getObject() = this.asCfgNode()
74+
or
75+
// get
76+
// NOTE: will not track bound method, `f = obj.func; f()`
77+
result.asCfgNode().(CallNode).getFunction().(AttrNode).getObject("pop") = this.asCfgNode()
78+
}
79+
}
80+
81+
/** Class of dictionary-like objects */
82+
abstract class DictLike extends Node {
83+
/** Gets a Node that is an access of an element of this dictionary-like object */
84+
Node getElementAccess() {
85+
// subscript
86+
result.asCfgNode().(SubscriptNode).getObject() = this.asCfgNode()
87+
or
88+
// get
89+
// NOTE: will not track bound method, `f = obj.func; f()`
90+
result.asCfgNode().(CallNode).getFunction().(AttrNode).getObject("get") = this.asCfgNode()
91+
}
92+
}

python/ql/src/experimental/semmle/python/frameworks/Flask.qll

Lines changed: 41 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ private import python
66
private import experimental.dataflow.DataFlow
77
private import experimental.dataflow.RemoteFlowSources
88
private import experimental.semmle.python.Concepts
9+
private import experimental.semmle.python.frameworks.Werkzeug
910

1011
private module Flask {
1112
/** Gets a reference to the `flask` module. */
@@ -42,42 +43,39 @@ private module Flask {
4243
override string getSourceType() { result = "flask.request" }
4344
}
4445

45-
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
46-
/** Gets a reference to the MultiDict attributes of `flask.request`. */
47-
DataFlow::Node requestMultiDictAttribute(DataFlow::TypeTracker t) {
48-
t.start() and
49-
result.asCfgNode().(AttrNode).getObject(["args", "values", "form"]) =
50-
flask::request().asCfgNode()
51-
or
52-
exists(DataFlow::TypeTracker t2 | result = requestMultiDictAttribute(t2).track(t2, t))
53-
}
54-
55-
/** Gets a reference to the MultiDict attributes of `flask.request`. */
56-
DataFlow::Node requestMultiDictAttribute() {
57-
result = requestMultiDictAttribute(DataFlow::TypeTracker::end())
58-
}
59-
46+
/**
47+
* A source of remote flow from attributes from a flask request.
48+
*
49+
* See https://flask.palletsprojects.com/en/1.1.x/api/#flask.Request
50+
*/
6051
private class RequestInputAccess extends RemoteFlowSource::Range {
52+
string attr_name;
53+
6154
RequestInputAccess() {
6255
// attributes
63-
exists(AttrNode attr, string name |
64-
this.asCfgNode() = attr and attr.getObject(name) = flask::request().asCfgNode()
56+
exists(AttrNode attr |
57+
this.asCfgNode() = attr and attr.getObject(attr_name) = flask::request().asCfgNode()
6558
|
66-
name in ["path",
67-
// string
59+
attr_name in ["path",
60+
// str
6861
"full_path", "base_url", "url", "access_control_request_method", "content_encoding",
6962
"content_md5", "content_type", "data", "method", "mimetype", "origin", "query_string",
7063
"referrer", "remote_addr", "remote_user", "user_agent",
7164
// dict
7265
"environ", "cookies", "mimetype_params", "view_args",
73-
//
74-
"args", "values", "form",
7566
// json
7667
"json",
7768
// List[str]
7869
"access_route",
7970
// file-like
8071
"stream", "input_stream",
72+
// MultiDict[str, str]
73+
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
74+
"args", "values", "form",
75+
// MultiDict[str, FileStorage]
76+
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
77+
// TODO: FileStorage needs extra taint steps
78+
"files",
8179
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.HeaderSet
8280
"access_control_request_headers", "pragma",
8381
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Accept
@@ -89,33 +87,37 @@ private module Flask {
8987
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.RequestCacheControl
9088
// TODO: has attributes like `no_cache`, and `to_header` method (actually, many of these models do)
9189
"cache_control",
92-
// TODO: MultiDict[FileStorage]
93-
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
94-
"files",
9590
// https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.Headers
9691
// TODO: dict-like with wsgiref.headers.Header compatibility methods
9792
"headers"]
9893
)
9994
or
10095
// methods
101-
exists(CallNode call, string name | this.asCfgNode() = call |
102-
// NOTE: will not track bound method, `f = func; f()`
103-
name in ["get_data", "get_json"] and
104-
call.getFunction().(AttrNode).getObject(name) = flask::request().asCfgNode()
105-
)
106-
or
107-
// multi dict special handling
108-
(
109-
this = requestMultiDictAttribute()
110-
or
111-
exists(CallNode call | this.asCfgNode() = call |
112-
// NOTE: will not track bound method, `f = func; f()`
113-
call.getFunction().(AttrNode).getObject("getlist") =
114-
requestMultiDictAttribute().asCfgNode()
115-
)
96+
exists(CallNode call | this.asCfgNode() = call |
97+
// NOTE: will not track bound method, `f = obj.func; f()`
98+
attr_name in ["get_data", "get_json"] and
99+
call.getFunction().(AttrNode).getObject(attr_name) = flask::request().asCfgNode()
116100
)
117101
}
118102

119103
override string getSourceType() { result = "flask.request input" }
120104
}
105+
106+
private class RequestInputMultiDict extends RequestInputAccess,
107+
Werkzeug::Datastructures::MultiDict {
108+
RequestInputMultiDict() { attr_name in ["args", "values", "form", "files"] }
109+
}
110+
111+
private class RequestInputFiles extends RequestInputMultiDict {
112+
RequestInputFiles() { attr_name = "files" }
113+
}
114+
115+
private class RequestInputFileStorage extends Werkzeug::Datastructures::FileStorage {
116+
RequestInputFileStorage() {
117+
exists(RequestInputFiles files, Werkzeug::Datastructures::MultiDictTracked filesTracked |
118+
filesTracked.getMultiDict() = files and
119+
this = filesTracked.getElementAccess()
120+
)
121+
}
122+
}
121123
}
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/**
2+
* Provides classes modeling security-relevant aspects of the `flask` package.
3+
*/
4+
5+
private import python
6+
private import experimental.dataflow.DataFlow
7+
private import experimental.dataflow.TaintTracking
8+
9+
module Werkzeug {
10+
module Datastructures {
11+
// ---------------------------------------------------------------------- //
12+
// MultiDict //
13+
// ---------------------------------------------------------------------- //
14+
/**
15+
* A Node representing an instance of a werkzeug.datastructures.MultiDict
16+
*
17+
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.MultiDict
18+
*/
19+
abstract class MultiDict extends DataFlow::Node { }
20+
21+
private DataFlow::Node multiDictTrack(MultiDict multiDict, DataFlow::TypeTracker t) {
22+
t.start() and
23+
result instanceof MultiDict
24+
or
25+
exists(DataFlow::TypeTracker t2 | result = multiDictTrack(multiDict, t2).track(t2, t))
26+
}
27+
28+
/** Gets a reference to the MultiDict attributes of `flask.request`. */
29+
private DataFlow::Node multiDictTrack(MultiDict multiDict) {
30+
result = multiDictTrack(multiDict, DataFlow::TypeTracker::end())
31+
}
32+
33+
class MultiDictTracked extends DataFlow::Node, DataFlow::DictLike {
34+
MultiDict multiDict;
35+
36+
MultiDictTracked() { this = multiDictTrack(multiDict) }
37+
38+
MultiDict getMultiDict() { result = multiDict }
39+
40+
override DataFlow::Node getElementAccess() {
41+
result = DataFlow::DictLike.super.getElementAccess()
42+
or
43+
exists(MultiDictGetListCallResultTracked tracked_call_result |
44+
tracked_call_result.getCall().getMultiDict() = this and
45+
result = tracked_call_result.getElementAccess()
46+
)
47+
}
48+
}
49+
50+
private DataFlow::Node multiDictGetListTrack(MultiDictTracked multiDict, DataFlow::TypeTracker t) {
51+
/*
52+
* using t.startInAttr("getlist") was not good solution
53+
* ```py
54+
* a = request.args
55+
* b = a
56+
* a.getlist("key")
57+
* b.getlist("key")
58+
* ```
59+
* would give `request.args` -> `b.getlist` -- this is correct, but not helpful in a taint-path explanation,
60+
* we REALLY WANT it to be `request.args -> a -> b -> b.getlist`
61+
* This requirement means that we do need the predicate `multiDictTrack`, which could be spared otherwise.
62+
*/
63+
64+
t.start() and
65+
result.asCfgNode().(AttrNode).getObject("getlist") = multiDict.asCfgNode()
66+
or
67+
exists(DataFlow::TypeTracker t2 | result = multiDictGetListTrack(multiDict, t2).track(t2, t))
68+
}
69+
70+
private DataFlow::Node multiDictGetListTrack(MultiDictTracked multiDict) {
71+
result = multiDictGetListTrack(multiDict, DataFlow::TypeTracker::end())
72+
}
73+
74+
private class MultiDictGetListCall extends DataFlow::Node {
75+
MultiDictTracked multiDict;
76+
77+
MultiDictGetListCall() {
78+
this.asCfgNode().(CallNode).getFunction() = multiDictGetListTrack(multiDict).asCfgNode()
79+
}
80+
81+
MultiDictTracked getMultiDict() { result = multiDict }
82+
}
83+
84+
private DataFlow::Node multiDictGetListCallTrack(
85+
MultiDictGetListCall call, DataFlow::TypeTracker t
86+
) {
87+
t.start() and
88+
result = call
89+
or
90+
exists(DataFlow::TypeTracker t2 | result = multiDictGetListCallTrack(call, t2).track(t2, t))
91+
}
92+
93+
/** Gets a reference to the MultiDict attributes of `flask.request`. */
94+
private DataFlow::Node multiDictGetListCallTrack(MultiDictGetListCall call) {
95+
result = multiDictGetListCallTrack(call, DataFlow::TypeTracker::end())
96+
}
97+
98+
private class MultiDictGetListCallResultTracked extends DataFlow::Node, DataFlow::ListLike {
99+
MultiDictGetListCall call;
100+
101+
MultiDictGetListCallResultTracked() { this = multiDictGetListCallTrack(call) }
102+
103+
MultiDictGetListCall getCall() { result = call }
104+
}
105+
106+
private class MultiDictAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
107+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
108+
nodeTo.(MultiDictGetListCall).getMultiDict() = nodeFrom.(MultiDictTracked)
109+
}
110+
}
111+
112+
// ---------------------------------------------------------------------- //
113+
// FileStorage //
114+
// ---------------------------------------------------------------------- //
115+
/**
116+
* A Node representing an instance of a werkzeug.datastructures.FileStorage
117+
*
118+
* See https://werkzeug.palletsprojects.com/en/1.0.x/datastructures/#werkzeug.datastructures.FileStorage
119+
*/
120+
abstract class FileStorage extends DataFlow::Node { }
121+
122+
private class FileStorageAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
123+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
124+
nodeFrom instanceof FileStorage and
125+
exists(string name |
126+
name in ["filename",
127+
// str
128+
"name", "content_type", "mimetype",
129+
// file-like
130+
"stream",
131+
// TODO: werkzeug.datastructures.Headers
132+
"headers",
133+
// dict[str, str]
134+
"mimetype_params"] and
135+
nodeTo.asCfgNode().(AttrNode).getObject(name) = nodeFrom.asCfgNode()
136+
)
137+
}
138+
}
139+
}
140+
}

python/ql/test/experimental/library-tests/frameworks/flask/TestTaint.expected

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@
3535
| test.py:65 | ok | test_taint | request.data |
3636
| test.py:68 | ok | test_taint | request.files |
3737
| test.py:69 | ok | test_taint | request.files['key'] |
38-
| test.py:70 | fail | test_taint | request.files['key'].filename |
39-
| test.py:71 | fail | test_taint | request.files['key'].stream |
40-
| test.py:72 | fail | test_taint | request.files.getlist(..) |
41-
| test.py:73 | fail | test_taint | request.files.getlist(..)[0].filename |
42-
| test.py:74 | fail | test_taint | request.files.getlist(..)[0].stream |
38+
| test.py:70 | ok | test_taint | request.files['key'].filename |
39+
| test.py:71 | ok | test_taint | request.files['key'].stream |
40+
| test.py:72 | ok | test_taint | request.files.getlist(..) |
41+
| test.py:73 | ok | test_taint | request.files.getlist(..)[0].filename |
42+
| test.py:74 | ok | test_taint | request.files.getlist(..)[0].stream |
4343
| test.py:77 | ok | test_taint | request.form |
4444
| test.py:78 | ok | test_taint | request.form['key'] |
4545
| test.py:79 | ok | test_taint | request.form.getlist(..) |
@@ -93,4 +93,4 @@
9393
| test.py:177 | ok | test_taint | request.args.getlist(..) |
9494
| test.py:178 | ok | test_taint | a.getlist(..) |
9595
| test.py:179 | ok | test_taint | b.getlist(..) |
96-
| test.py:180 | fail | test_taint | gl(..) |
96+
| test.py:180 | ok | test_taint | gl(..) |

0 commit comments

Comments
 (0)