@@ -10,6 +10,35 @@ import semmle.python.dataflow.new.TaintTracking
1010import semmle.python.frameworks.Stdlib
1111import semmle.python.dataflow.new.RemoteFlowSources
1212
13+ /**
14+ * Handle those three cases of Tarfile opens:
15+ * - `tarfile.open()`
16+ * - `tarfile.TarFile()`
17+ * - `MKtarfile.Tarfile.open()`
18+ */
19+ API:: Node tarfileOpen ( ) {
20+ result in [
21+ API:: moduleImport ( "tarfile" ) .getMember ( [ "open" , "TarFile" ] ) ,
22+ API:: moduleImport ( "tarfile" ) .getMember ( "TarFile" ) .getASubclass ( ) .getMember ( "open" )
23+ ]
24+ }
25+
26+ /**
27+ * Handle the previous three cases, plus the use of `closing` in the previous cases
28+ */
29+ class AllTarfileOpens extends API:: CallNode {
30+ AllTarfileOpens ( ) {
31+ this = tarfileOpen ( ) .getACall ( )
32+ or
33+ exists ( API:: Node closing , Node arg |
34+ closing = API:: moduleImport ( "contextlib" ) .getMember ( "closing" ) and
35+ this = closing .getACall ( ) and
36+ arg = this .getArg ( 0 ) and
37+ arg = tarfileOpen ( ) .getACall ( )
38+ )
39+ }
40+ }
41+
1342class UnsafeUnpackingConfig extends TaintTracking:: Configuration {
1443 UnsafeUnpackingConfig ( ) { this = "UnsafeUnpackingConfig" }
1544
@@ -68,8 +97,47 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
6897 }
6998
7099 override predicate isSink ( DataFlow:: Node sink ) {
71- // A sink capturing method calls to `unpack_archive`.
72- sink = API:: moduleImport ( "shutil" ) .getMember ( "unpack_archive" ) .getACall ( ) .getArg ( 0 )
100+ (
101+ // A sink capturing method calls to `unpack_archive`.
102+ sink = API:: moduleImport ( "shutil" ) .getMember ( "unpack_archive" ) .getACall ( ) .getArg ( 0 )
103+ or
104+ // A sink capturing method calls to `extractall` without `members` argument.
105+ // For a call to `file.extractall` without `members` argument, `file` is considered a sink.
106+ exists ( MethodCallNode call , AllTarfileOpens atfo |
107+ call = atfo .getReturn ( ) .getMember ( "extractall" ) .getACall ( ) and
108+ not exists ( Node arg | arg = call .getArgByName ( "members" ) ) and
109+ sink = call .getObject ( )
110+ )
111+ or
112+ // A sink capturing method calls to `extractall` with `members` argument.
113+ // For a call to `file.extractall` with `members` argument, `file` is considered a sink if not
114+ // a the `members` argument contains a NameConstant as None, a List or call to the method `getmembers`.
115+ // Otherwise, the argument of `members` is considered a sink.
116+ exists ( MethodCallNode call , Node arg , AllTarfileOpens atfo |
117+ call = atfo .getReturn ( ) .getMember ( "extractall" ) .getACall ( ) and
118+ arg = call .getArgByName ( "members" ) and
119+ if
120+ arg .asCfgNode ( ) instanceof NameConstantNode or
121+ arg .asCfgNode ( ) instanceof ListNode
122+ then sink = call .getObject ( )
123+ else
124+ if arg .( MethodCallNode ) .getMethodName ( ) = "getmembers"
125+ then sink = arg .( MethodCallNode ) .getObject ( )
126+ else sink = call .getArgByName ( "members" )
127+ )
128+ or
129+ // An argument to `extract` is considered a sink.
130+ exists ( AllTarfileOpens atfo |
131+ sink = atfo .getReturn ( ) .getMember ( "extract" ) .getACall ( ) .getArg ( 0 )
132+ )
133+ or
134+ //An argument to `_extract_member` is considered a sink.
135+ exists ( MethodCallNode call , AllTarfileOpens atfo |
136+ call = atfo .getReturn ( ) .getMember ( "_extract_member" ) .getACall ( ) and
137+ call .getArg ( 1 ) .( AttrRead ) .accesses ( sink , "name" )
138+ )
139+ ) and
140+ not sink .getScope ( ) .getLocation ( ) .getFile ( ) .inStdlib ( )
73141 }
74142
75143 override predicate isAdditionalTaintStep ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
@@ -119,5 +187,19 @@ class UnsafeUnpackingConfig extends TaintTracking::Configuration {
119187 // Join the base_dir to the filename
120188 nodeTo = API:: moduleImport ( "os" ) .getMember ( "path" ) .getMember ( "join" ) .getACall ( ) and
121189 nodeFrom = nodeTo .( API:: CallNode ) .getArg ( 1 )
190+ or
191+ // Go through an Open for a Tarfile
192+ nodeTo = tarfileOpen ( ) .getACall ( ) and nodeFrom = nodeTo .( MethodCallNode ) .getArg ( 0 )
193+ or
194+ // Handle the case where the getmembers is used.
195+ nodeTo .( MethodCallNode ) .calls ( nodeFrom , "getmembers" ) and
196+ nodeFrom instanceof AllTarfileOpens
197+ or
198+ // To handle the case of `with closing(tarfile.open()) as file:`
199+ // we add a step from the first argument of `closing` to the call to `closing`,
200+ // whenever that first argument is a return of `tarfile.open()`.
201+ nodeTo = API:: moduleImport ( "contextlib" ) .getMember ( "closing" ) .getACall ( ) and
202+ nodeFrom = nodeTo .( API:: CallNode ) .getArg ( 0 ) and
203+ nodeFrom = tarfileOpen ( ) .getReturn ( ) .getAValueReachableFromSource ( )
122204 }
123205}
0 commit comments