1+ /**
2+ * INTERNAL. DO NOT USE.
3+ *
4+ * Provides predicates for resolving imports.
5+ */
6+
17private import python
28private import semmle.python.dataflow.new.DataFlow
39private import semmle.python.dataflow.new.internal.ImportStar
410private import semmle.python.dataflow.new.TypeTracker
511
12+ /**
13+ * Python modules and the way imports are resolved are... complicated. Here's a crash course in how
14+ * it works, as well as some caveats to bear in mind when looking at the implementation in this
15+ * module.
16+ *
17+ * First, let's consider the humble `import` statement:
18+ * ```python
19+ * import foo
20+ * import bar.baz
21+ * import ham.eggs as spam
22+ * ```
23+ *
24+ * In the AST, all imports are aliased, as in the last import above. That is, `import foo` becomes
25+ * `import foo as foo`, and `import bar.baz` becomes `import bar as bar`. Note that `import` is
26+ * exclusively used to import modules -- if `eggs` is an attribute of the `ham` module (and not a
27+ * submodule of the `ham` package), then the third line above is an error.
28+ *
29+ * Next, we have the `from` statement. This one is a bit more complicated, but still has the same
30+ * aliasing desugaring as above applied to it. Thus, `from foo import bar` becomes
31+ * `from foo import bar as bar`.
32+ *
33+ * In general, `from foo import bar` can mean two different things:
34+ *
35+ * 1. If `foo` is a module, and `bar` is an attribute of `foo`, then `from foo import bar` imports
36+ * the attribute `bar` into the current module (binding it to the name `bar`).
37+ * 2. If `foo` is a package, and `bar` is a submodule of `foo`, then `from foo import bar` first imports
38+ * `foo.bar`, and then attempts to locate the `bar` attribute again. In most cases, that attribute
39+ * will then point to the `bar` submodule.
40+ *
41+ * Now, when in comes to how these imports are represented in the AST, things get a bit complicated.
42+ * First of all, both of the above forms of imports get mapped to the same kind of AST node:
43+ * `Import`. An `Import` node has a sequence of names, each of which is an `Alias` node. This `Alias`
44+ * node represents the `x as y` bit of each imported module.
45+ *
46+ * The same is true for `from` imports. So, how then do we distinguish between the two forms of
47+ * imports? The distinguishing feature is the left hand side of the `as` node. If the left hand side
48+ * is an `ImportExpr`, then it is a plain import. If it is an `ImportMember`, then it is a `from`
49+ * import. (And to confuse matters even more, this `ImportMember` contains another `ImportExpr` for
50+ * the bit between the `from` and `import` keywords.)
51+ *
52+ * Caveats:
53+ *
54+ * - A relative import of the form `from .foo import bar as baz` not only imports `bar` and binds it
55+ * to the name `baz`, but also imports `foo` and binds it to the name `foo`. This only happens with
56+ * relative imports. `from foo import bar as baz` only binds `bar` to `baz`.
57+ * - Modules may also be packages, so e.g. `import foo.bar` may import the `bar` submodule in the `foo`
58+ * package, or the `bar` subpackage of the `foo` package. The practical difference here is the name of
59+ * the module that is imported, as the package `foo.bar` will have the "name" `foo.bar.__init__`,
60+ * corresponding to the fact that the code that is executed is in the `__init__.py` file of the
61+ * `bar` package.
62+ */
663module ImportResolution {
764 /**
865 * Holds if the module `m` defines a name `name` by assigning `defn` to it. This is an
966 * overapproximation, as `name` may not in fact be exported (e.g. by defining an `__all__` that does
1067 * not include `name`).
1168 */
69+ pragma [ nomagic]
1270 predicate module_export ( Module m , string name , DataFlow:: CfgNode defn ) {
1371 exists ( EssaVariable v |
1472 v .getName ( ) = name and
@@ -18,12 +76,216 @@ module ImportResolution {
1876 or
1977 defn .getNode ( ) = v .getDefinition ( ) .( ArgumentRefinement ) .getArgument ( )
2078 )
79+ or
80+ exists ( Alias a |
81+ defn .asExpr ( ) = [ a .getValue ( ) , a .getValue ( ) .( ImportMember ) .getModule ( ) ] and
82+ a .getAsname ( ) .( Name ) .getId ( ) = name and
83+ defn .getScope ( ) = m
84+ )
85+ }
86+
87+ /**
88+ * Holds if the module `m` explicitly exports the name `name` by listing it in `__all__`. Only
89+ * handles simple cases where we can statically tell that this is the case.
90+ */
91+ private predicate all_mentions_name ( Module m , string name ) {
92+ exists ( DefinitionNode def , SequenceNode n |
93+ def .getValue ( ) = n and
94+ def .( NameNode ) .getId ( ) = "__all__" and
95+ def .getScope ( ) = m and
96+ any ( StrConst s | s .getText ( ) = name ) = n .getAnElement ( ) .getNode ( )
97+ )
98+ }
99+
100+ /**
101+ * Holds if the module `m` either does not set `__all__` (and so implicitly exports anything that
102+ * doesn't start with an underscore), or sets `__all__` in a way that's too complicated for us to
103+ * handle (in which case we _also_ pretend that it just exports all such names).
104+ */
105+ private predicate no_or_complicated_all ( Module m ) {
106+ // No mention of `__all__` in the module
107+ not exists ( DefinitionNode def | def .getScope ( ) = m and def .( NameNode ) .getId ( ) = "__all__" )
108+ or
109+ // `__all__` is set to a non-sequence value
110+ exists ( DefinitionNode def |
111+ def .( NameNode ) .getId ( ) = "__all__" and
112+ def .getScope ( ) = m and
113+ not def .getValue ( ) instanceof SequenceNode
114+ )
115+ or
116+ // `__all__` is used in some way that doesn't involve storing a value in it. This usually means
117+ // it is being mutated through `append` or `extend`, which we don't handle.
118+ exists ( NameNode n | n .getId ( ) = "__all__" and n .getScope ( ) = m and n .isLoad ( ) )
119+ }
120+
121+ private predicate potential_module_export ( Module m , string name ) {
122+ all_mentions_name ( m , name )
123+ or
124+ no_or_complicated_all ( m ) and
125+ (
126+ exists ( NameNode n | n .getId ( ) = name and n .getScope ( ) = m and name .charAt ( 0 ) != "_" )
127+ or
128+ exists ( Alias a | a .getAsname ( ) .( Name ) .getId ( ) = name and a .getValue ( ) .getScope ( ) = m )
129+ )
130+ }
131+
132+ /**
133+ * Holds if the module `reexporter` exports the module `reexported` under the name
134+ * `reexported_name`.
135+ */
136+ private predicate module_reexport ( Module reexporter , string reexported_name , Module reexported ) {
137+ exists ( DataFlow:: Node ref |
138+ ref = getImmediateModuleReference ( reexported ) and
139+ module_export ( reexporter , reexported_name , ref ) and
140+ potential_module_export ( reexporter , reexported_name )
141+ )
142+ }
143+
144+ /**
145+ * Gets a reference to `sys.modules`.
146+ */
147+ private DataFlow:: Node sys_modules_reference ( ) {
148+ result =
149+ any ( DataFlow:: AttrRef a |
150+ a .getAttributeName ( ) = "modules" and a .getObject ( ) .asExpr ( ) .( Name ) .getId ( ) = "sys"
151+ )
152+ }
153+
154+ /** Gets a module that may have been added to `sys.modules`. */
155+ private Module sys_modules_module_with_name ( string name ) {
156+ exists ( ControlFlowNode n , DataFlow:: Node mod |
157+ exists ( SubscriptNode sub |
158+ sub .getObject ( ) = sys_modules_reference ( ) .asCfgNode ( ) and
159+ sub .getIndex ( ) = n and
160+ n .getNode ( ) .( StrConst ) .getText ( ) = name and
161+ sub .( DefinitionNode ) .getValue ( ) = mod .asCfgNode ( ) and
162+ mod = getModuleReference ( result )
163+ )
164+ )
21165 }
22166
23167 Module getModule ( DataFlow:: CfgNode node ) {
24168 exists ( ModuleValue mv |
25169 node .getNode ( ) .pointsTo ( mv ) and
26170 result = mv .getScope ( )
171+ Module getModuleImportedByImportStar( ImportStar i) {
172+ isPreferredModuleForName ( result .getFile ( ) , i .getImportedModuleName ( ) )
173+ }
174+
175+ /** Gets a data-flow node that may be a reference to a module with the name `module_name`. */
176+ DataFlow:: Node getReferenceToModuleName ( string module_name ) {
177+ // Regular import statements, e.g.
178+ // import foo # implicitly `import foo as foo`
179+ // import foo as foo_alias
180+ exists ( Import i , Alias a | a = i .getAName ( ) |
181+ result .asExpr ( ) = a .getAsname ( ) and
182+ module_name = a .getValue ( ) .( ImportExpr ) .getImportedModuleName ( )
183+ )
184+ or
185+ // The module part of a `from ... import ...` statement, e.g. the `..foo.bar` in
186+ // from ..foo.bar import baz # ..foo.bar might point to, say, package.subpackage.foo.bar
187+ exists ( ImportMember i | result .asExpr ( ) = i .getModule ( ) |
188+ module_name = i .getModule ( ) .( ImportExpr ) .getImportedModuleName ( )
189+ )
190+ or
191+ // Modules (not attributes) imported via `from ... import ... statements`, e.g.
192+ // from foo.bar import baz # imports foo.bar.baz as baz
193+ // from foo.bar import baz as baz_alias # imports foo.bar.baz as baz_alias
194+ exists ( Import i , Alias a , ImportMember im | a = i .getAName ( ) and im = a .getValue ( ) |
195+ i .isFromImport ( ) and
196+ result .asExpr ( ) = a .getAsname ( ) and
197+ module_name = im .getModule ( ) .( ImportExpr ) .getImportedModuleName ( ) + "." + im .getName ( )
198+ )
199+ or
200+ // For parity with the points-to based solution, the `ImportExpr` and `ImportMember` bits of the
201+ // above cases should _also_ point to the right modules.
202+ result .asExpr ( ) = any ( ImportExpr i | i .getImportedModuleName ( ) = module_name )
203+ or
204+ result .asExpr ( ) =
205+ any ( ImportMember i |
206+ i .getModule ( ) .( ImportExpr ) .getImportedModuleName ( ) = module_name
207+ or
208+ i .getModule ( ) .( ImportExpr ) .getImportedModuleName ( ) + "." + i .getName ( ) = module_name and
209+ none ( )
210+ )
211+ }
212+
213+ /** Gets a dataflow node that is an immediate reference to the module `m`. */
214+ DataFlow:: Node getImmediateModuleReference ( Module m ) {
215+ exists ( string module_name | result = getReferenceToModuleName ( module_name ) |
216+ // Depending on whether the referenced module is a package or not, we may need to add a
217+ // trailing `.__init__` to the module name.
218+ isPreferredModuleForName ( m .getFile ( ) , module_name + [ "" , ".__init__" ] )
219+ or
220+ // Module defined via `sys.modules`
221+ m = sys_modules_module_with_name ( module_name )
222+ )
223+ or
224+ // Reading an attribute on a module may return a submodule (or subpackage).
225+ exists ( DataFlow:: AttrRead ar , Module p , string attr_name |
226+ ar .getObject ( ) = getModuleReference ( p ) and
227+ attr_name = any ( Module m0 ) .getFile ( ) .getStem ( ) and
228+ ar .getAttributeName ( ) = attr_name and
229+ result = ar
230+ |
231+ isPreferredModuleForName ( m .getFile ( ) , p .getPackageName ( ) + "." + attr_name + [ "" , ".__init__" ] )
232+ or
233+ // This is also true for attributes that come from reexports.
234+ module_reexport ( p , attr_name , m )
235+ )
236+ or
237+ // Submodules that are implicitly defined when importing via `from ... import ...` statements.
238+ // In practice, we create a definition for each module in a package, even if it is not imported.
239+ exists ( string submodule , Module package |
240+ SsaSource:: init_module_submodule_defn ( result .asVar ( ) .getSourceVariable ( ) ,
241+ package .getEntryNode ( ) ) and
242+ isPreferredModuleForName ( m .getFile ( ) ,
243+ package .getPackageName ( ) + "." + submodule + [ "" , ".__init__" ] )
27244 )
28245 }
246+
247+ /** Join-order helper for `getModuleReference`. */
248+ pragma [ nomagic]
249+ private predicate module_name_in_scope ( DataFlow:: Node node , Scope s , string name , Module m ) {
250+ node .getScope ( ) = s and
251+ node .asExpr ( ) .( Name ) .getId ( ) = name and
252+ pragma [ only_bind_into ] ( node ) = getImmediateModuleReference ( pragma [ only_bind_into ] ( m ) )
253+ }
254+
255+ /** Join-order helper for `getModuleReference`. */
256+ pragma [ nomagic]
257+ private predicate module_reference_in_scope ( DataFlow:: Node node , Scope s , string name ) {
258+ node .getScope ( ) = s and
259+ exists ( Name n | n = node .asExpr ( ) |
260+ n .getId ( ) = name and
261+ pragma [ only_bind_into ] ( n ) .isUse ( )
262+ )
263+ }
264+
265+ /**
266+ * Gets a reference to the module `m` (including through certain kinds of local and global flow).
267+ */
268+ DataFlow:: Node getModuleReference ( Module m ) {
269+ // Immedate references to the module
270+ result = getImmediateModuleReference ( m )
271+ or
272+ // Flow (local or global) forward to a later reference to the module.
273+ exists ( DataFlow:: Node ref | ref = getModuleReference ( m ) |
274+ DataFlow:: localFlow ( ref , result )
275+ or
276+ exists ( DataFlow:: ModuleVariableNode mv |
277+ mv .getAWrite ( ) = ref and
278+ result = mv .getARead ( )
279+ )
280+ )
281+ or
282+ // A reference to a name that is bound to a module in an enclosing scope.
283+ exists ( DataFlow:: Node def , Scope def_scope , Scope use_scope , string name |
284+ module_name_in_scope ( pragma [ only_bind_into ] ( def ) , pragma [ only_bind_into ] ( def_scope ) ,
285+ pragma [ only_bind_into ] ( name ) , pragma [ only_bind_into ] ( m ) ) and
286+ module_reference_in_scope ( result , use_scope , name ) and
287+ use_scope .getEnclosingScope * ( ) = def_scope
288+ )
289+ }
290+
29291}
0 commit comments