Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 66f5d0d

Browse files
committed
Python: Model encoding/decoding with base64 module
1 parent eff244d commit 66f5d0d

5 files changed

Lines changed: 162 additions & 0 deletions

File tree

python/ql/src/experimental/semmle/python/frameworks/Stdlib.qll

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,131 @@ private class OpenCall extends FileSystemAccess::Range, DataFlow::CfgNode {
753753
}
754754
}
755755

756+
// ---------------------------------------------------------------------------
757+
// base64
758+
// ---------------------------------------------------------------------------
759+
/** Gets a reference to the `base64` module. */
760+
private DataFlow::Node base64(DataFlow::TypeTracker t) {
761+
t.start() and
762+
result = DataFlow::importNode("base64")
763+
or
764+
exists(DataFlow::TypeTracker t2 | result = base64(t2).track(t2, t))
765+
}
766+
767+
/** Gets a reference to the `base64` module. */
768+
DataFlow::Node base64() { result = base64(DataFlow::TypeTracker::end()) }
769+
770+
/**
771+
* Gets a reference to the attribute `attr_name` of the `base64` module.
772+
* WARNING: Only holds for a few predefined attributes.
773+
*/
774+
private DataFlow::Node base64_attr(DataFlow::TypeTracker t, string attr_name) {
775+
attr_name in ["b64encode", "b64decode", "standard_b64encode", "standard_b64decode",
776+
"urlsafe_b64encode", "urlsafe_b64decode", "b32encode", "b32decode", "b16encode",
777+
"b16decode", "encodestring", "decodestring", "a85encode", "a85decode", "b85encode",
778+
"b85decode", "encodebytes", "decodebytes"] and
779+
(
780+
t.start() and
781+
result = DataFlow::importNode("base64" + "." + attr_name)
782+
or
783+
t.startInAttr(attr_name) and
784+
result = base64()
785+
)
786+
or
787+
// Due to bad performance when using normal setup with `base64_attr(t2, attr_name).track(t2, t)`
788+
// we have inlined that code and forced a join
789+
exists(DataFlow::TypeTracker t2 |
790+
exists(DataFlow::StepSummary summary |
791+
base64_attr_first_join(t2, attr_name, result, summary) and
792+
t = t2.append(summary)
793+
)
794+
)
795+
}
796+
797+
pragma[nomagic]
798+
private predicate base64_attr_first_join(
799+
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
800+
) {
801+
DataFlow::StepSummary::step(base64_attr(t2, attr_name), res, summary)
802+
}
803+
804+
/**
805+
* Gets a reference to the attribute `attr_name` of the `base64` module.
806+
* WARNING: Only holds for a few predefined attributes.
807+
*/
808+
private DataFlow::Node base64_attr(string attr_name) {
809+
result = base64_attr(DataFlow::TypeTracker::end(), attr_name)
810+
}
811+
812+
/** A call to any of the encode functions in the `base64` module. */
813+
private class Base64EncodeCall extends Encoding::Range, DataFlow::CfgNode {
814+
override CallNode node;
815+
816+
Base64EncodeCall() {
817+
exists(string name |
818+
name in ["b64encode", "standard_b64encode", "urlsafe_b64encode", "b32encode", "b16encode",
819+
"encodestring", "a85encode", "b85encode", "encodebytes"] and
820+
node.getFunction() = base64_attr(name).asCfgNode()
821+
)
822+
}
823+
824+
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
825+
826+
override DataFlow::Node getOutput() { result = this }
827+
828+
override string getFormat() {
829+
exists(string name | node.getFunction() = base64_attr(name).asCfgNode() |
830+
name in ["b64encode", "standard_b64encode", "urlsafe_b64encode", "encodestring", "encodebytes"] and
831+
result = "Base64"
832+
or
833+
name = "b32encode" and result = "Base32"
834+
or
835+
name = "b16encode" and result = "Base16"
836+
or
837+
name = "a85encode" and result = "Ascii85"
838+
or
839+
name = "b85encode" and result = "Base85"
840+
)
841+
}
842+
}
843+
844+
/** A call to any of the decode functions in the `base64` module. */
845+
private class Base64DecodeCall extends Decoding::Range, DataFlow::CfgNode {
846+
override CallNode node;
847+
848+
Base64DecodeCall() {
849+
exists(string name |
850+
name in ["b64decode", "standard_b64decode", "urlsafe_b64decode", "b32decode", "b16decode",
851+
"decodestring", "a85decode", "b85decode", "decodebytes"] and
852+
node.getFunction() = base64_attr(name).asCfgNode()
853+
)
854+
}
855+
856+
override predicate mayExecuteInput() { none() }
857+
858+
override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }
859+
860+
override DataFlow::Node getOutput() { result = this }
861+
862+
override string getFormat() {
863+
exists(string name | node.getFunction() = base64_attr(name).asCfgNode() |
864+
name in ["b64decode", "standard_b64decode", "urlsafe_b64decode", "decodestring", "decodebytes"] and
865+
result = "Base64"
866+
or
867+
name = "b32decode" and result = "Base32"
868+
or
869+
name = "b16decode" and result = "Base16"
870+
or
871+
name = "a85decode" and result = "Ascii85"
872+
or
873+
name = "b85decode" and result = "Base85"
874+
)
875+
}
876+
}
877+
878+
// ---------------------------------------------------------------------------
879+
// OTHER
880+
// ---------------------------------------------------------------------------
756881
/**
757882
* A call to the `startswith` method on a string.
758883
* See https://docs.python.org/3.9/library/stdtypes.html#str.startswith
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import base64
2+
3+
# TODO: These tests should be merged with python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_string.py
4+
base64.a85decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Ascii85
5+
base64.b85decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base85
6+
base64.decodebytes(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base64
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import base64
2+
3+
# TODO: These tests should be merged with python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/test_string.py
4+
base64.a85encode(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Ascii85
5+
base64.b85encode(bs)# $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base85
6+
base64.encodebytes(bs)# $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base64
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
import pickle
22
import marshal
3+
import base64
34

45
pickle.loads(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=pickle $decodeMayExecuteInput
56
marshal.loads(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=marshal $decodeMayExecuteInput
7+
8+
# TODO: These tests should be merged with python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py
9+
base64.b64decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base64
10+
base64.standard_b64decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base64
11+
base64.urlsafe_b64decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base64
12+
base64.b32decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base32
13+
base64.b16decode(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base16
14+
# deprecated since Python 3.1, but still works
15+
base64.decodestring(payload) # $decodeInput=payload $decodeOutput=Attribute() $decodeFormat=Base64
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import pickle
2+
import marshal
3+
import base64
4+
5+
pickle.dumps(obj) # $f-:encodeInput=obj $f-:encodeOutput=Attribute() $f-:encodeFormat=pickle $f-:encodeMayExecuteInput
6+
marshal.dumps(obj) # $f-:encodeInput=obj $f-:encodeOutput=Attribute() $f-:encodeFormat=marshal $f-:encodeMayExecuteInput
7+
8+
# TODO: These tests should be merged with python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/test_string.py
9+
base64.b64encode(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base64
10+
base64.standard_b64encode(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base64
11+
base64.urlsafe_b64encode(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base64
12+
base64.b32encode(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base32
13+
base64.b16encode(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base16
14+
# deprecated since Python 3.1, but still works
15+
base64.encodestring(bs) # $encodeInput=bs $encodeOutput=Attribute() $encodeFormat=Base64

0 commit comments

Comments
 (0)