@@ -866,6 +866,133 @@ private module Stdlib {
866866 }
867867}
868868
869+ // ---------------------------------------------------------------------------
870+ // hashlib
871+ // ---------------------------------------------------------------------------
872+ /** Gets a call to `hashlib.new` with `algorithmName` as the first argument. */
873+ private DataFlow:: CallCfgNode hashlibNewCall ( string algorithmName ) {
874+ exists ( DataFlow:: Node nameArg |
875+ result = API:: moduleImport ( "hashlib" ) .getMember ( "new" ) .getACall ( ) and
876+ nameArg in [ result .getArg ( 0 ) , result .getArgByName ( "name" ) ] and
877+ exists ( StrConst str |
878+ DataFlow:: exprNode ( str ) .( DataFlow:: LocalSourceNode ) .flowsTo ( nameArg ) and
879+ algorithmName = str .getText ( )
880+ )
881+ )
882+ }
883+
884+ /** Gets a reference to the result of calling `hashlib.new` with `algorithmName` as the first argument. */
885+ private DataFlow:: LocalSourceNode hashlibNewResult ( DataFlow:: TypeTracker t , string algorithmName ) {
886+ t .start ( ) and
887+ result = hashlibNewCall ( algorithmName )
888+ or
889+ // Due to bad performance when using normal setup with `hashlibNewResult(t2, algorithmName).track(t2, t)`
890+ // we have inlined that code and forced a join
891+ exists ( DataFlow:: TypeTracker t2 |
892+ exists ( DataFlow:: StepSummary summary |
893+ hashlibNewResult_first_join ( t2 , algorithmName , result , summary ) and
894+ t = t2 .append ( summary )
895+ )
896+ )
897+ }
898+
899+ pragma [ nomagic]
900+ private predicate hashlibNewResult_first_join (
901+ DataFlow:: TypeTracker t2 , string algorithmName , DataFlow:: Node res , DataFlow:: StepSummary summary
902+ ) {
903+ DataFlow:: StepSummary:: step ( hashlibNewResult ( t2 , algorithmName ) , res , summary )
904+ }
905+
906+ /** Gets a reference to the result of calling `hashlib.new` with `algorithmName` as the first argument. */
907+ DataFlow:: Node hashlibNewResult ( string algorithmName ) {
908+ hashlibNewResult ( DataFlow:: TypeTracker:: end ( ) , algorithmName ) .flowsTo ( result )
909+ }
910+
911+ /**
912+ * A hashing operation by supplying initial data when calling the `hashlib.new` function.
913+ */
914+ class HashlibNewCall extends Cryptography:: CryptographicOperation:: Range , DataFlow:: CallCfgNode {
915+ string hashName ;
916+
917+ HashlibNewCall ( ) {
918+ this = hashlibNewCall ( hashName ) and
919+ exists ( [ this .getArg ( 1 ) , this .getArgByName ( "data" ) ] )
920+ }
921+
922+ override Cryptography:: CryptographicAlgorithm getAlgorithm ( ) { result .matchesName ( hashName ) }
923+
924+ override DataFlow:: Node getAnInput ( ) { result in [ this .getArg ( 1 ) , this .getArgByName ( "data" ) ] }
925+ }
926+
927+ /**
928+ * A hashing operation by using the `update` method on the result of calling the `hashlib.new` function.
929+ */
930+ class HashlibNewUpdateCall extends Cryptography:: CryptographicOperation:: Range ,
931+ DataFlow:: CallCfgNode {
932+ string hashName ;
933+
934+ HashlibNewUpdateCall ( ) {
935+ exists ( DataFlow:: AttrRead attr |
936+ attr .getObject ( ) = hashlibNewResult ( hashName ) and
937+ this .getFunction ( ) = attr and
938+ attr .getAttributeName ( ) = "update"
939+ )
940+ }
941+
942+ override Cryptography:: CryptographicAlgorithm getAlgorithm ( ) { result .matchesName ( hashName ) }
943+
944+ override DataFlow:: Node getAnInput ( ) { result = this .getArg ( 0 ) }
945+ }
946+
947+ /**
948+ * A hashing operation from the `hashlib` package using one of the predefined classes
949+ * (such as `hashlib.md5`). `hashlib.new` is not included, since it is handled by
950+ * `HashlibNewCall` and `HashlibNewUpdateCall`.
951+ */
952+ abstract class HashlibGenericHashOperation extends Cryptography:: CryptographicOperation:: Range ,
953+ DataFlow:: CallCfgNode {
954+ string hashName ;
955+ API:: Node hashClass ;
956+
957+ bindingset [ this ]
958+ HashlibGenericHashOperation ( ) {
959+ not hashName = "new" and
960+ hashClass = API:: moduleImport ( "hashlib" ) .getMember ( hashName )
961+ }
962+
963+ override Cryptography:: CryptographicAlgorithm getAlgorithm ( ) { result .matchesName ( hashName ) }
964+ }
965+
966+ /**
967+ * A hashing operation from the `hashlib` package using one of the predefined classes
968+ * (such as `hashlib.md5`), by calling its' `update` mehtod.
969+ */
970+ class HashlibHashClassUpdateCall extends HashlibGenericHashOperation {
971+ HashlibHashClassUpdateCall ( ) { this = hashClass .getReturn ( ) .getMember ( "update" ) .getACall ( ) }
972+
973+ override DataFlow:: Node getAnInput ( ) { result = this .getArg ( 0 ) }
974+ }
975+
976+ /**
977+ * A hashing operation from the `hashlib` package using one of the predefined classes
978+ * (such as `hashlib.md5`), by passing data to when instantiating the class.
979+ */
980+ class HashlibDataPassedToHashClass extends HashlibGenericHashOperation {
981+ HashlibDataPassedToHashClass ( ) {
982+ // we only want to model calls to classes such as `hashlib.md5()` if initial data
983+ // is passed as an argument
984+ this = hashClass .getACall ( ) and
985+ exists ( [ this .getArg ( 0 ) , this .getArgByName ( "string" ) ] )
986+ }
987+
988+ override DataFlow:: Node getAnInput ( ) {
989+ result = this .getArg ( 0 )
990+ or
991+ // in Python 3.9, you are allowed to use `hashlib.md5(string=<bytes-like>)`.
992+ result = this .getArgByName ( "string" )
993+ }
994+ }
995+
869996// ---------------------------------------------------------------------------
870997// OTHER
871998// ---------------------------------------------------------------------------
0 commit comments