33 * @description Extracting files from a malicious tar archive without validating that the
44 * destination file path is within the destination directory can cause files outside
55 * the destination directory to be overwritten.
6- * @kind path-problem
6+ * @kind path-problem
77 * @id py/tarslip
88 * @problem.severity error
99 * @precision medium
1313
1414import python
1515import semmle.python.security.Paths
16-
1716import semmle.python.security.TaintTracking
1817import semmle.python.security.strings.Basic
1918
2019/** A TaintKind to represent open tarfile objects. That is, the result of calling `tarfile.open(...)` */
2120class OpenTarFile extends TaintKind {
22- OpenTarFile ( ) {
23- this = "tarfile.open"
24- }
21+ OpenTarFile ( ) { this = "tarfile.open" }
2522
2623 override TaintKind getTaintOfMethodResult ( string name ) {
2724 name = "getmember" and result instanceof TarFileInfo
2825 or
2926 name = "getmembers" and result .( SequenceKind ) .getItem ( ) instanceof TarFileInfo
3027 }
3128
32- override ClassValue getType ( ) {
33- result = Module:: named ( "tarfile" ) .attr ( "TarFile" )
34- }
35-
36- override TaintKind getTaintForIteration ( ) {
37- result instanceof TarFileInfo
38- }
29+ override ClassValue getType ( ) { result = Value:: named ( "tarfile.TarFile" ) }
3930
31+ override TaintKind getTaintForIteration ( ) { result instanceof TarFileInfo }
4032}
4133
4234/** The source of open tarfile objects. That is, any call to `tarfile.open(...)` */
4335class TarfileOpen extends TaintSource {
44-
4536 TarfileOpen ( ) {
46- Module :: named ( "tarfile" ) . attr ( " open") .getACall ( ) = this
47- and
48- / * If argument refers to a string object, then it's a hardcoded path and
37+ Value :: named ( "tarfile. open" ) .getACall ( ) = this and
38+ /*
39+ * If argument refers to a string object, then it's a hardcoded path and
4940 * this tarfile is safe.
5041 */
51- not this . ( CallNode ) . getAnArg ( ) . refersTo ( any ( StringObject str ) )
52- and
42+
43+ not this . ( CallNode ) . getAnArg ( ) . pointsTo ( any ( StringValue str ) ) and
5344 /* Ignore opens within the tarfile module itself */
5445 not this .( ControlFlowNode ) .getLocation ( ) .getFile ( ) .getBaseName ( ) = "tarfile.py"
5546 }
5647
57- override predicate isSourceOf ( TaintKind kind ) {
58- kind instanceof OpenTarFile
59- }
60-
48+ override predicate isSourceOf ( TaintKind kind ) { kind instanceof OpenTarFile }
6149}
6250
6351class TarFileInfo extends TaintKind {
52+ TarFileInfo ( ) { this = "tarfile.entry" }
6453
65- TarFileInfo ( ) {
66- this = "tarfile.entry"
67- }
68-
69- override TaintKind getTaintOfMethodResult ( string name ) {
70- name = "next" and result = this
71- }
54+ override TaintKind getTaintOfMethodResult ( string name ) { name = "next" and result = this }
7255
7356 override TaintKind getTaintOfAttribute ( string name ) {
7457 name = "name" and result instanceof TarFileInfo
7558 }
7659}
7760
61+ /*
62+ * For efficiency we don't want to track the flow of taint
63+ * around the tarfile module.
64+ */
7865
79- /* For efficiency we don't want to track the flow of taint
80- * around the tarfile module. */
8166class ExcludeTarFilePy extends Sanitizer {
82-
83- ExcludeTarFilePy ( ) {
84- this = "Tar sanitizer"
85- }
67+ ExcludeTarFilePy ( ) { this = "Tar sanitizer" }
8668
8769 override predicate sanitizingNode ( TaintKind taint , ControlFlowNode node ) {
8870 node .getLocation ( ) .getFile ( ) .getBaseName ( ) = "tarfile.py" and
@@ -94,45 +76,34 @@ class ExcludeTarFilePy extends Sanitizer {
9476 taint .( SequenceKind ) .getItem ( ) instanceof TarFileInfo
9577 )
9678 }
97-
9879}
9980
10081/* Any call to an extractall method */
10182class ExtractAllSink extends TaintSink {
102-
10383 CallNode call ;
10484
10585 ExtractAllSink ( ) {
10686 this = call .getFunction ( ) .( AttrNode ) .getObject ( "extractall" ) and
10787 count ( call .getAnArg ( ) ) = 0
10888 }
10989
110- override predicate sinks ( TaintKind kind ) {
111- kind instanceof OpenTarFile
112- }
113-
90+ override predicate sinks ( TaintKind kind ) { kind instanceof OpenTarFile }
11491}
11592
11693/* Argument to extract method */
11794class ExtractSink extends TaintSink {
118-
11995 CallNode call ;
12096
12197 ExtractSink ( ) {
12298 call .getFunction ( ) .( AttrNode ) .getName ( ) = "extract" and
12399 this = call .getArg ( 0 )
124100 }
125101
126- override predicate sinks ( TaintKind kind ) {
127- kind instanceof TarFileInfo
128- }
129-
102+ override predicate sinks ( TaintKind kind ) { kind instanceof TarFileInfo }
130103}
131104
132-
133105/* Members argument to extract method */
134106class ExtractMembersSink extends TaintSink {
135-
136107 CallNode call ;
137108
138109 ExtractMembersSink ( ) {
@@ -145,21 +116,15 @@ class ExtractMembersSink extends TaintSink {
145116 or
146117 kind instanceof OpenTarFile
147118 }
148-
149119}
150120
151121class TarFileInfoSanitizer extends Sanitizer {
152-
153- TarFileInfoSanitizer ( ) {
154- this = "TarInfo sanitizer"
155- }
122+ TarFileInfoSanitizer ( ) { this = "TarInfo sanitizer" }
156123
157124 override predicate sanitizingEdge ( TaintKind taint , PyEdgeRefinement test ) {
158125 path_sanitizing_test ( test .getTest ( ) ) and
159126 taint instanceof TarFileInfo
160127 }
161-
162-
163128}
164129
165130private predicate path_sanitizing_test ( ControlFlowNode test ) {
@@ -170,7 +135,6 @@ private predicate path_sanitizing_test(ControlFlowNode test) {
170135}
171136
172137class TarSlipConfiguration extends TaintTracking:: Configuration {
173-
174138 TarSlipConfiguration ( ) { this = "TarSlip configuration" }
175139
176140 override predicate isSource ( TaintTracking:: Source source ) { source instanceof TarfileOpen }
@@ -193,16 +157,15 @@ class TarSlipConfiguration extends TaintTracking::Configuration {
193157 node .asVariable ( ) .getDefinition ( ) = def
194158 or
195159 node .asCfgNode ( ) = def .getDefiningNode ( )
196- |
160+ |
197161 def .getScope ( ) = Value:: named ( "tarfile.open" ) .( CallableValue ) .getScope ( )
198162 or
199163 def .isSelf ( ) and def .getScope ( ) .getEnclosingModule ( ) .getName ( ) = "tarfile"
200164 )
201165 }
202166}
203167
204-
205168from TarSlipConfiguration config , TaintedPathSource src , TaintedPathSink sink
206169where config .hasFlowPath ( src , sink )
207- select sink .getSink ( ) , src , sink , "Extraction of tarfile from $@" , src .getSource ( ) , "a potentially untrusted source"
208-
170+ select sink .getSink ( ) , src , sink , "Extraction of tarfile from $@" , src .getSource ( ) ,
171+ "a potentially untrusted source"
0 commit comments