@@ -13,91 +13,160 @@ import python
1313import semmle.python.security.TaintTracking
1414
1515
16- /** A regular expression that identifies strings that look like they represent secret data that are not passwords. */
17- private string suspiciousNonPassword ( ) {
18- result = "(?is).*(account|accnt|(?<!un)trusted).*"
19- }
20- /** A regular expression that identifies strings that look like they represent secret data that are passwords. */
21- private string suspiciousPassword ( ) {
22- result = "(?is).*(password|passwd).*"
23- }
24-
25- /** A regular expression that identifies strings that look like they represent secret data. */
26- private string suspicious ( ) {
27- result = suspiciousPassword ( ) or result = suspiciousNonPassword ( )
28- }
29-
3016/**
31- * A string for `match` that identifies strings that look like they represent secret data that is
32- * hashed or encrypted.
17+ * Provides heuristics for identifying names related to sensitive information.
18+ *
19+ * INTERNAL: Do not use directly.
20+ * This is copied from the javascript library, but should be language independent.
3321 */
34- private string nonSuspicious ( ) {
35- result = "(?is).*(hash|(?<!un)encrypted|\\bcrypt\\b).*"
36- }
22+ private module HeuristicNames {
23+
24+ /**
25+ * Gets a regular expression that identifies strings that may indicate the presence of secret
26+ * or trusted data.
27+ */
28+ string maybeSecret ( ) { result = "(?is).*((?<!is)secret|(?<!un|is)trusted).*" }
29+
30+ /**
31+ * Gets a regular expression that identifies strings that may indicate the presence of
32+ * user names or other account information.
33+ */
34+ string maybeAccountInfo ( ) {
35+ result = "(?is).*acc(ou)?nt.*" or
36+ result = "(?is).*(puid|username|userid).*"
37+ }
3738
38- /** An expression that might contain sensitive data. */
39- abstract class SensitiveExpr extends Expr { }
40-
41- /** A method access that might produce sensitive data. */
42- class SensitiveCall extends SensitiveExpr , Call {
43- SensitiveCall ( ) {
44- exists ( string name |
45- name = this .getFunc ( ) .( Name ) .getId ( ) or
46- name = this .getFunc ( ) .( Attribute ) .getName ( ) or
47- exists ( StringObject s |
48- this .getAnArg ( ) .refersTo ( s ) |
49- name = s .getText ( )
50- )
51- |
52- name .regexpMatch ( suspicious ( ) ) and
53- not name .regexpMatch ( nonSuspicious ( ) )
54- )
39+ /**
40+ * Gets a regular expression that identifies strings that may indicate the presence of
41+ * a password or an authorization key.
42+ */
43+ string maybePassword ( ) {
44+ result = "(?is).*pass(wd|word|code|phrase)(?!.*question).*" or
45+ result = "(?is).*(auth(entication|ori[sz]ation)?)key.*"
5546 }
56- }
5747
58- /** An access to a variable or property that might contain sensitive data. */
59- abstract class SensitiveVariableAccess extends SensitiveExpr {
48+ /**
49+ * Gets a regular expression that identifies strings that may indicate the presence of
50+ * a certificate.
51+ */
52+ string maybeCertificate ( ) { result = "(?is).*(cert)(?!.*(format|name)).*" }
53+
54+ /**
55+ * Gets a regular expression that identifies strings that may indicate the presence
56+ * of sensitive data, with `classification` describing the kind of sensitive data involved.
57+ */
58+ string maybeSensitive ( SensitiveData data ) {
59+ result = maybeSecret ( ) and data instanceof SensitiveData:: Secret
60+ or
61+ result = maybeAccountInfo ( ) and data instanceof SensitiveData:: Id
62+ or
63+ result = maybePassword ( ) and data instanceof SensitiveData:: Password
64+ or
65+ result = maybeCertificate ( ) and data instanceof SensitiveData:: Certificate
66+ }
6067
61- string name ;
68+ /**
69+ * Gets a regular expression that identifies strings that may indicate the presence of data
70+ * that is hashed or encrypted, and hence rendered non-sensitive.
71+ */
72+ string notSensitive ( ) {
73+ result = "(?is).*(redact|censor|obfuscate|hash|md5|sha|((?<!un)(en))?(crypt|code)).*"
74+ }
6275
63- SensitiveVariableAccess ( ) {
64- this .( Name ) .getId ( ) = name or
65- this .( Attribute ) .getName ( ) = name
66- }
76+ bindingset [ name]
77+ SensitiveData getSensitiveDataForName ( string name ) {
78+ name .regexpMatch ( HeuristicNames:: maybeSensitive ( result ) ) and
79+ not name .regexpMatch ( HeuristicNames:: notSensitive ( ) )
80+ }
6781
6882}
6983
70- /** An access to a variable or property that might contain sensitive data. */
71- private class BasicSensitiveVariableAccess extends SensitiveVariableAccess {
84+ abstract class SensitiveData extends TaintKind {
7285
73- BasicSensitiveVariableAccess ( ) {
74- name .regexpMatch ( suspicious ( ) ) and not name .regexpMatch ( nonSuspicious ( ) )
75- }
86+ bindingset [ this ]
87+ SensitiveData ( ) { this = this }
7688
7789}
7890
79- class SensitiveData extends TaintKind {
91+ module SensitiveData {
8092
81- SensitiveData ( ) {
82- this = "sensitive.data"
93+ class Secret extends SensitiveData {
94+ Secret ( ) { this = "sensitive.data.secret" }
95+ override string repr ( ) { result = "a secret" }
8396 }
8497
85- }
98+ class Id extends SensitiveData {
99+ Id ( ) { this = "sensitive.data.id" }
100+ override string repr ( ) { result = "an ID" }
101+ }
102+
103+ class Password extends SensitiveData {
104+ Password ( ) { this = "sensitive.data.password" }
105+ override string repr ( ) { result = "a password" }
106+ }
107+
108+ class Certificate extends SensitiveData {
109+ Certificate ( ) { this = "sensitive.data.certificate" }
110+ override string repr ( ) { result = "a certificate or key" }
111+ }
86112
113+ private SensitiveData fromFunction ( FunctionObject f ) {
114+ result = HeuristicNames:: getSensitiveDataForName ( f .getName ( ) )
115+ or
116+ // This is particularly to pick up methods with an argument like "password", which
117+ // may indicate a lookup.
118+ exists ( string name | name = f .getFunction ( ) .getAnArg ( ) .asName ( ) .getId ( ) |
119+ result = HeuristicNames:: getSensitiveDataForName ( name )
120+ )
121+ }
87122
88- class SensitiveDataSource extends TaintSource {
123+ abstract class Source extends TaintSource {
124+
125+ abstract string repr ( ) ;
89126
90- SensitiveDataSource ( ) {
91- this .( ControlFlowNode ) .getNode ( ) instanceof SensitiveExpr
92127 }
93128
94- override string toString ( ) {
95- result = "sensitive.data.source"
129+ private class SensitiveCallSource extends Source {
130+
131+ SensitiveData data ;
132+
133+ SensitiveCallSource ( ) {
134+ exists ( FunctionObject callee |
135+ callee .getACall ( ) = this |
136+ data = fromFunction ( callee )
137+ )
138+ }
139+
140+ override predicate isSourceOf ( TaintKind kind ) {
141+ kind = data
142+ }
143+
144+ override string repr ( ) {
145+ result = "Call returning " + data .repr ( )
146+ }
147+
96148 }
97149
98- override predicate isSourceOf ( TaintKind kind ) {
99- kind instanceof SensitiveData
150+ /** An access to a variable or property that might contain sensitive data. */
151+ private class SensitiveVariableAccess extends SensitiveData:: Source {
152+
153+ SensitiveData data ;
154+
155+ SensitiveVariableAccess ( ) {
156+ data = HeuristicNames:: getSensitiveDataForName ( this .( AttrNode ) .getName ( ) )
157+ }
158+
159+ override predicate isSourceOf ( TaintKind kind ) {
160+ kind = data
161+ }
162+
163+ override string repr ( ) {
164+ result = "an attribute or property containing " + data .repr ( )
165+ }
166+
100167 }
101168
102169}
103170
171+ //Backwards compatibility
172+ class SensitiveDataSource = SensitiveData:: Source ;
0 commit comments