Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 10fdc4b

Browse files
committed
Python: Add support for more yaml loading functions
1 parent 2021cdb commit 10fdc4b

3 files changed

Lines changed: 36 additions & 13 deletions

File tree

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lgtm,codescanning
2+
* Improved modeling for the `PyYAML` PyPI package (imported as `yaml`), now supporting `safe_load`, `unsafe_load`, and `full_load` (as well as the `..._load_all` functions). In the current version of PyYAML (5.4.1), only `safe_load` is known to not be safe from code execution exploits. Calls to the other functions are now modeled as sinks of the _Deserializing untrusted input_ (`py/unsafe-deserialization`) query.

python/ql/src/semmle/python/frameworks/Yaml.qll

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,13 @@ private module Yaml {
2929
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
3030
*/
3131
private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) {
32-
attr_name in ["load", "SafeLoader", "BaseLoader"] and
32+
attr_name in [
33+
// functions
34+
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
35+
"safe_load", "safe_load_all",
36+
// Classes
37+
"SafeLoader", "BaseLoader"
38+
] and
3339
(
3440
t.start() and
3541
result = DataFlow::importNode("yaml." + attr_name)
@@ -68,13 +74,22 @@ private module Yaml {
6874
}
6975

7076
/**
71-
* A call to `yaml.load`
77+
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
78+
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
79+
*
7280
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
7381
*/
7482
private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode {
7583
override CallNode node;
84+
string func_name;
7685

77-
YamlLoadCall() { node.getFunction() = Yaml::yaml::yaml_attr("load").asCfgNode() }
86+
YamlLoadCall() {
87+
func_name in [
88+
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
89+
"safe_load", "safe_load_all"
90+
] and
91+
node.getFunction() = Yaml::yaml::yaml_attr(func_name).asCfgNode()
92+
}
7893

7994
/**
8095
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
@@ -84,10 +99,16 @@ private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode {
8499
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
85100
*/
86101
override predicate mayExecuteInput() {
102+
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
103+
or
104+
func_name in ["load", "load_all"] and
87105
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
88106
// then the default loader will be used, which is not safe.
89-
not node.getArgByName("Loader") =
90-
Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"]).asCfgNode()
107+
not exists(DataFlow::Node loader_arg |
108+
loader_arg.asCfgNode() in [node.getArg(1), node.getArgByName("Loader")]
109+
|
110+
loader_arg = Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"])
111+
)
91112
}
92113

93114
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }

python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
from yaml import SafeLoader
33

44
yaml.load(payload) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
5-
yaml.load(payload, SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML SPURIOUS: decodeMayExecuteInput
5+
yaml.load(payload, SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
66
yaml.load(payload, Loader=SafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
77
yaml.load(payload, Loader=yaml.BaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
88

9-
yaml.safe_load(payload) # $ MISSING: decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
10-
yaml.unsafe_load(payload) # $ MISSING: decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
11-
yaml.full_load(payload) # $ MISSING: decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
9+
yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
10+
yaml.unsafe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
11+
yaml.full_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
1212

13-
yaml.load_all(payload) # $ MISSING: decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
14-
yaml.safe_load_all(payload) # $ MISSING: decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
15-
yaml.unsafe_load_all(payload) # $ MISSING: decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
16-
yaml.full_load_all(payload) # $ MISSING: decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
13+
yaml.load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
14+
yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
15+
yaml.unsafe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
16+
yaml.full_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput

0 commit comments

Comments
 (0)