Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 207c76b

Browse files
committed
C++: Path explanations in DefaultTaintTracking
The first three queries are migrated to use path explanations.
1 parent b07380d commit 207c76b

11 files changed

Lines changed: 704 additions & 75 deletions

File tree

cpp/ql/src/Security/CWE/CWE-022/TaintedPath.ql

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* @name Uncontrolled data used in path expression
33
* @description Accessing paths influenced by users can allow an
44
* attacker to access unexpected resources.
5-
* @kind problem
5+
* @kind path-problem
66
* @problem.severity warning
77
* @precision medium
88
* @id cpp/path-injection
@@ -17,6 +17,7 @@ import cpp
1717
import semmle.code.cpp.security.FunctionWithWrappers
1818
import semmle.code.cpp.security.Security
1919
import semmle.code.cpp.security.TaintTracking
20+
import TaintedWithPath
2021

2122
/**
2223
* A function for opening a file.
@@ -51,12 +52,19 @@ class FileFunction extends FunctionWithWrappers {
5152
override predicate interestingArg(int arg) { arg = 0 }
5253
}
5354

55+
class TaintedPathConfiguration extends TaintTrackingConfiguration {
56+
override predicate isSink(Element tainted) {
57+
exists(FileFunction fileFunction | fileFunction.outermostWrapperFunctionCall(tainted, _))
58+
}
59+
}
60+
5461
from
55-
FileFunction fileFunction, Expr taintedArg, Expr taintSource, string taintCause, string callChain
62+
FileFunction fileFunction, Expr taintedArg, Expr taintSource, PathNode sourceNode,
63+
PathNode sinkNode, string taintCause, string callChain
5664
where
5765
fileFunction.outermostWrapperFunctionCall(taintedArg, callChain) and
58-
tainted(taintSource, taintedArg) and
66+
taintedWithPath(taintSource, taintedArg, sourceNode, sinkNode) and
5967
isUserInput(taintSource, taintCause)
60-
select taintedArg,
68+
select taintedArg, sourceNode, sinkNode,
6169
"This argument to a file access function is derived from $@ and then passed to " + callChain,
6270
taintSource, "user input (" + taintCause + ")"

cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatString.ql

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* @description Using externally-controlled format strings in
44
* printf-style functions can lead to buffer overflows
55
* or data representation problems.
6-
* @kind problem
6+
* @kind path-problem
77
* @problem.severity warning
88
* @precision medium
99
* @id cpp/tainted-format-string
@@ -16,12 +16,20 @@ import cpp
1616
import semmle.code.cpp.security.Security
1717
import semmle.code.cpp.security.FunctionWithWrappers
1818
import semmle.code.cpp.security.TaintTracking
19+
import TaintedWithPath
1920

20-
from PrintfLikeFunction printf, Expr arg, string printfFunction, Expr userValue, string cause
21+
class TaintedPathConfiguration extends TaintTrackingConfiguration {
22+
override predicate isSink(Element tainted) {
23+
exists(PrintfLikeFunction printf | printf.outermostWrapperFunctionCall(tainted, _))
24+
}
25+
}
26+
27+
from PrintfLikeFunction printf, Expr arg, PathNode sourceNode,
28+
PathNode sinkNode, string printfFunction, Expr userValue, string cause
2129
where
2230
printf.outermostWrapperFunctionCall(arg, printfFunction) and
23-
tainted(userValue, arg) and
31+
taintedWithPath(userValue, arg, sourceNode, sinkNode) and
2432
isUserInput(userValue, cause)
25-
select arg,
33+
select arg, sourceNode, sinkNode,
2634
"The value of this argument may come from $@ and is being used as a formatting argument to " +
2735
printfFunction, userValue, cause

cpp/ql/src/Security/CWE/CWE-190/TaintedAllocationSize.ql

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* @name Overflow in uncontrolled allocation size
33
* @description Allocating memory with a size controlled by an external
44
* user can result in integer overflow.
5-
* @kind problem
5+
* @kind path-problem
66
* @problem.severity error
77
* @precision high
88
* @id cpp/uncontrolled-allocation-size
@@ -13,21 +13,33 @@
1313

1414
import cpp
1515
import semmle.code.cpp.security.TaintTracking
16+
import TaintedWithPath
1617

17-
predicate taintedAllocSize(Expr e, Expr source, string taintCause) {
18+
predicate taintedChild(Expr e, Expr tainted) {
1819
(
19-
isAllocationExpr(e) or
20+
isAllocationExpr(e)
21+
or
2022
any(MulExpr me | me.getAChild() instanceof SizeofOperator) = e
2123
) and
24+
tainted = e.getAChild() and
25+
tainted.getUnspecifiedType() instanceof IntegralType
26+
}
27+
28+
class TaintedAllocationSizeConfiguration extends TaintTrackingConfiguration {
29+
override predicate isSink(Element tainted) { taintedChild(_, tainted) }
30+
}
31+
32+
predicate taintedAllocSize(
33+
Expr e, Expr source, PathNode sourceNode, PathNode sinkNode, string taintCause
34+
) {
35+
isUserInput(source, taintCause) and
2236
exists(Expr tainted |
23-
tainted = e.getAChild() and
24-
tainted.getUnspecifiedType() instanceof IntegralType and
25-
isUserInput(source, taintCause) and
26-
tainted(source, tainted)
37+
taintedChild(e, tainted) and
38+
taintedWithPath(source, tainted, sourceNode, sinkNode)
2739
)
2840
}
2941

30-
from Expr e, Expr source, string taintCause
31-
where taintedAllocSize(e, source, taintCause)
32-
select e, "This allocation size is derived from $@ and might overflow", source,
33-
"user input (" + taintCause + ")"
42+
from Expr e, Expr source, PathNode sourceNode, PathNode sinkNode, string taintCause
43+
where taintedAllocSize(e, source, sourceNode, sinkNode, taintCause)
44+
select e, sourceNode, sinkNode, "This allocation size is derived from $@ and might overflow",
45+
source, "user input (" + taintCause + ")"

cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import cpp
22
import semmle.code.cpp.security.Security
33
private import semmle.code.cpp.ir.dataflow.DataFlow
44
private import semmle.code.cpp.ir.dataflow.DataFlow2
5+
private import semmle.code.cpp.ir.dataflow.DataFlow3
56
private import semmle.code.cpp.ir.IR
67
private import semmle.code.cpp.ir.dataflow.internal.DataFlowDispatch as Dispatch
78
private import semmle.code.cpp.models.interfaces.Taint
@@ -171,6 +172,7 @@ private predicate nodeIsBarrierIn(DataFlow::Node node) {
171172
node = getNodeForSource(any(Expr e))
172173
}
173174

175+
cached
174176
private predicate instructionTaintStep(Instruction i1, Instruction i2) {
175177
// Expressions computed from tainted data are also tainted
176178
exists(CallInstruction call, int argIndex | call = i2 |
@@ -381,3 +383,135 @@ Function resolveCall(Call call) {
381383
result = Dispatch::viableCallable(callInstruction)
382384
)
383385
}
386+
387+
/**
388+
* Provides definitions for augmenting source/sink pairs with data-flow paths
389+
* between them. From a `@kind path-problem` query, import this module in the
390+
* global scope, extend `TaintTrackingConfiguration`, and use `taintedWithPath`
391+
* in place of `tainted`.
392+
*
393+
* Importing this module will also import the query predicates that contain the
394+
* taint paths.
395+
*/
396+
module TaintedWithPath {
397+
/**
398+
* A taint-tracking configuration that matches sources and sinks in the same
399+
* way as the `tainted` predicate.
400+
*/
401+
class TaintTrackingConfiguration extends int {
402+
TaintTrackingConfiguration() { this = 1 }
403+
404+
/** Override this to specify which elements are sinks in this configuration. */
405+
abstract predicate isSink(Element e);
406+
}
407+
408+
private class AdjustedConfiguration extends DataFlow3::Configuration {
409+
AdjustedConfiguration() { this = "AdjustedConfiguration" }
410+
411+
override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) }
412+
413+
override predicate isSink(DataFlow::Node sink) {
414+
exists(TaintTrackingConfiguration cfg | cfg.isSink(adjustedSink(sink)))
415+
}
416+
417+
override predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
418+
instructionTaintStep(n1.asInstruction(), n2.asInstruction())
419+
}
420+
421+
override predicate isBarrier(DataFlow::Node node) { nodeIsBarrier(node) }
422+
423+
override predicate isBarrierIn(DataFlow::Node node) { nodeIsBarrierIn(node) }
424+
}
425+
426+
/*
427+
* A sink `Element` may map to multiple `DataFlowX::PathNode`s via (the
428+
* inverse of) `adjustedSink`. For example, an `Expr` maps to all its
429+
* conversions, and a `Variable` maps to all loads and stores from it. Because
430+
* the path node is part of the tuple that constitutes the alert, this leads
431+
* to duplicate alerts.
432+
*
433+
* To avoid showing duplicates, we edit the graph to replace the final node
434+
* coming from the data-flow library with a node that matches exactly the
435+
* `Element` sink that's requested.
436+
*
437+
* The same should ideally be done with the source, but we haven't seen a
438+
* need for it yet.
439+
*/
440+
441+
private newtype TPathNode =
442+
TWrapPathNode(DataFlow3::PathNode n) or
443+
TFinalPathNode(Element e) { exists(TaintTrackingConfiguration cfg | cfg.isSink(e)) }
444+
445+
/** An opaque type used for the nodes of a data-flow path. */
446+
class PathNode extends TPathNode {
447+
/** Gets a textual representation of this element. */
448+
string toString() { none() }
449+
450+
/**
451+
* Holds if this element is at the specified location.
452+
* The location spans column `startcolumn` of line `startline` to
453+
* column `endcolumn` of line `endline` in file `filepath`.
454+
* For more information, see
455+
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
456+
*/
457+
predicate hasLocationInfo(
458+
string filepath, int startline, int startcolumn, int endline, int endcolumn
459+
) {
460+
none()
461+
}
462+
}
463+
464+
private class WrapPathNode extends PathNode, TPathNode {
465+
DataFlow3::PathNode inner() { this = TWrapPathNode(result) }
466+
467+
override string toString() { result = this.inner().toString() }
468+
469+
override predicate hasLocationInfo(
470+
string filepath, int startline, int startcolumn, int endline, int endcolumn
471+
) {
472+
this.inner().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
473+
}
474+
}
475+
476+
private class FinalPathNode extends PathNode, TFinalPathNode {
477+
Element inner() { this = TFinalPathNode(result) }
478+
479+
override string toString() { result = this.inner().toString() }
480+
481+
override predicate hasLocationInfo(
482+
string filepath, int startline, int startcolumn, int endline, int endcolumn
483+
) {
484+
this
485+
.inner()
486+
.getLocation()
487+
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
488+
}
489+
}
490+
491+
/** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */
492+
query predicate edges(PathNode a, PathNode b) {
493+
DataFlow3::PathGraph::edges(a.(WrapPathNode).inner(), b.(WrapPathNode).inner())
494+
or
495+
// To avoid showing trivial-looking steps, we replace the last node instead
496+
// of adding an edge out of it.
497+
exists(WrapPathNode replaced |
498+
DataFlow3::PathGraph::edges(a.(WrapPathNode).inner(), replaced.inner()) and
499+
b.(FinalPathNode).inner() = adjustedSink(replaced.inner().getNode())
500+
)
501+
}
502+
503+
/** Holds if `n` is a node in the graph of data flow path explanations. */
504+
query predicate nodes(PathNode n, string key, string val) {
505+
key = "semmle.label" and val = n.toString()
506+
}
507+
508+
predicate taintedWithPath(Expr source, Element tainted, PathNode sourceNode, PathNode sinkNode) {
509+
exists(AdjustedConfiguration cfg, DataFlow3::PathNode sinkInner, DataFlow::Node sink |
510+
sourceNode.(WrapPathNode).inner().getNode() = getNodeForSource(source) and
511+
sinkInner.getNode() = sink and
512+
cfg.hasFlowPath(sourceNode.(WrapPathNode).inner(), sinkInner) and
513+
tainted = adjustedSink(sinkInner.getNode()) and
514+
tainted = sinkNode.(FinalPathNode).inner()
515+
)
516+
}
517+
}

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
323323
simpleInstructionLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asInstruction())
324324
}
325325

326+
cached
326327
private predicate simpleInstructionLocalFlowStep(Instruction iFrom, Instruction iTo) {
327328
iTo.(CopyInstruction).getSourceValue() = iFrom
328329
or
Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,11 @@
1-
| test.c:17:11:17:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename) | test.c:9:23:9:26 | argv | user input (argv) |
1+
edges
2+
| test.c:9:23:9:26 | argv | test.c:17:11:17:18 | (const char *)... |
3+
| test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName |
4+
nodes
5+
| test.c:9:23:9:26 | argv | semmle.label | argv |
6+
| test.c:17:11:17:18 | (const char *)... | semmle.label | (const char *)... |
7+
| test.c:17:11:17:18 | (const char *)... | semmle.label | (const char *)... |
8+
| test.c:17:11:17:18 | fileName | semmle.label | fileName |
9+
| test.c:27:11:27:18 | fileName | semmle.label | fileName |
10+
#select
11+
| test.c:17:11:17:18 | fileName | test.c:9:23:9:26 | argv | test.c:17:11:17:18 | fileName | This argument to a file access function is derived from $@ and then passed to fopen(filename) | test.c:9:23:9:26 | argv | user input (argv) |

0 commit comments

Comments
 (0)