Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 68fd75c

Browse files
committed
UnpackUnsafe query and tests
1 parent 5b31da4 commit 68fd75c

7 files changed

Lines changed: 164 additions & 0 deletions

File tree

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>Extracting files from a malicious tarball without validating that the destination file path
8+
is within the destination directory using `shutil.unpack_archive()` can cause files outside the
9+
destination directory to be overwritten, due to the possible presence of directory traversal elements
10+
(<code>..</code>) in archive path names.</p>
11+
12+
<p>Tarball contain archive entries representing each file in the archive. These entries
13+
include a file path for the entry, but these file paths are not restricted and may contain
14+
unexpected special elements such as the directory traversal element (<code>..</code>). If these
15+
file paths are used to determine an output file to write the contents of the archive item to, then
16+
the file may be written to an unexpected location. This can result in sensitive information being
17+
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
18+
files.</p>
19+
20+
<p>For example, if a tarball contains a file entry <code>../sim4n6.txt</code>, and the tarball
21+
is extracted to the directory <code>/tmp/tmp123</code>, then naively combining the paths would result
22+
in an output file path of <code>/tmp/tmp123/../sim4n6.txt</code>, which would cause the file to be
23+
written to <code>/tmp/</code>.</p>
24+
25+
</overview>
26+
<recommendation>
27+
28+
<p>Ensure that output paths constructed from tarball entries are validated
29+
to prevent writing files to unexpected locations.</p>
30+
31+
<p>Consider using a safer module, such as: <code>zipfile</code></p>
32+
33+
</recommendation>
34+
35+
<example>
36+
<p>
37+
In this example an archive is extracted without validating file paths.
38+
</p>
39+
40+
<sample src="examples/HIT_UnsafeUnpack.py" />
41+
42+
<p>To fix this vulnerability, we need to call the function <code>tarfile.extract()</code>
43+
on each <code>member</code> after verifying that it does not contain either `..` or startswith `/`.
44+
</p>
45+
46+
<sample src="examples/NoHIT_UnsafeUnpack.py" />
47+
48+
</example>
49+
<references>
50+
51+
<li>
52+
Shutil official documentation
53+
<a href="https://docs.python.org/3/library/shutil.html?highlight=unpack_archive#shutil.unpack_archive">shutil.unpack_archive() warning.</a>
54+
</li>
55+
</references>
56+
</qhelp>
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/**
2+
* @name Arbitrary file write during a remotely downloaded tarball extraction
3+
* @description Extracting files from a malicious tarball using `shutil.unpack_archive()` without validating
4+
* that the destination file path is within the destination directory can cause files outside
5+
* the destination directory to be overwritten. More precisely, if the tarball comes from a remote location.
6+
* @kind path-problem
7+
* @id py/unsafe-unpacking
8+
* @problem.severity error
9+
* @security-severity 7.5
10+
* @precision high
11+
* @tags security
12+
* external/cwe/cwe-022bis
13+
*/
14+
15+
import python
16+
import experimental.semmle.python.Concepts
17+
import DataFlow::PathGraph
18+
import semmle.python.ApiGraphs
19+
import semmle.python.dataflow.new.internal.Attributes
20+
import semmle.python.dataflow.new.DataFlow
21+
import semmle.python.ApiGraphs
22+
import semmle.python.dataflow.new.TaintTracking
23+
import semmle.python.Concepts
24+
25+
class UnsafeUnpackingConfig extends TaintTracking::Configuration {
26+
UnsafeUnpackingConfig() { this = "UnsafeUnpackingConfig" }
27+
28+
override predicate isSource(DataFlow::Node source) {
29+
// A source coming from a remote location
30+
exists(Http::Client::Request request | source = request) and
31+
not source.getScope().getLocation().getFile().inStdlib()
32+
}
33+
34+
override predicate isSink(DataFlow::Node sink) {
35+
// A sink capturing method calls to `unpack_archive`.
36+
sink =
37+
API::moduleImport("shutil").getMember("unpack_archive").getACall().getParameter(0).asSink() and
38+
not sink.getScope().getLocation().getFile().inStdlib()
39+
}
40+
41+
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
42+
// Writing the response data to the archive
43+
exists(API::CallNode call, MethodCallNode w |
44+
nodeTo = call.getArg(0) and
45+
call = API::builtin("open").getACall() and
46+
w.getMethodName() = "write" and
47+
w.getObject() = call.getReturn().getAValueReachableFromSource() and
48+
nodeFrom = w.getArg(0)
49+
)
50+
}
51+
}
52+
53+
from UnsafeUnpackingConfig config, DataFlow::PathNode source, DataFlow::PathNode sink
54+
where config.hasFlowPath(source, sink)
55+
select source.getNode(), source, sink, "Unsafe extraction from a malicious tarball, is used in a $@",
56+
source.getAQlClass(), "during archive unpacking."
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import requests
2+
import shutil
3+
4+
url = "https://www.someremote.location/tarball.tar.gz"
5+
response = requests.get(url, stream=True)
6+
7+
tarpath = "/tmp/tmp456/tarball.tar.gz"
8+
with open(tarpath, "wb") as f:
9+
f.write(response.raw.read())
10+
11+
untarredpath = "/tmp/tmp123"
12+
shutil.unpack_archive(tarpath, untarredpath)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import requests
2+
import tarfile
3+
4+
url = "https://www.someremote.location/tarball.tar.gz"
5+
response = requests.get(url, stream=True)
6+
7+
tarpath = "/tmp/tmp456/tarball.tar.gz"
8+
with open(tarpath, "wb") as f:
9+
f.write(response.raw.read())
10+
11+
untarredpath = "/tmp/tmp123"
12+
with tarfile.open(tarpath) as tar:
13+
for member in tar.getmembers():
14+
if member.name.startswith("/") or ".." in member.name:
15+
raise Exception("Path traversal identified in tarball")
16+
17+
tar.extract(untarredpath, member)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
edges
2+
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute |
3+
| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath |
4+
nodes
5+
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
6+
| UnsafeUnpack.py:9:15:9:26 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
7+
| UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | semmle.label | ControlFlowNode for tarpath |
8+
subpaths
9+
#select
10+
| UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:5:12:5:41 | ControlFlowNode for Attribute() | UnsafeUnpack.py:12:23:12:29 | ControlFlowNode for tarpath | Unsafe extraction from a malicious tarball, is used in a $@ | PathNode | during archive unpacking. |
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import requests
2+
import shutil
3+
4+
url = "https://www.someremote.location/tarball.tar.gz"
5+
response = requests.get(url, stream=True)
6+
7+
tarpath = "/tmp/tmp456/tarball.tar.gz"
8+
with open(tarpath, "wb") as f:
9+
f.write(response.raw.read())
10+
11+
untarredpath = "/tmp/tmp123"
12+
shutil.unpack_archive(tarpath, untarredpath)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
experimental/Security/CWE-022bis/UnsafeUnpack.ql

0 commit comments

Comments
 (0)