11import python
2- import semmle.python.objects.ObjectInternal
2+ deprecated import semmle.python.objects.ObjectInternal as OI
3+ import semmle.python.ApiGraphs
34
4- private predicate re_module_function ( string name , int flags ) {
5- name = "compile" and flags = 1
5+ /**
6+ * Gets the positional argument index containing the regular expression flags for the member of the
7+ * `re` module with the name `name`.
8+ */
9+ private int re_member_flags_arg ( string name ) {
10+ name = "compile" and result = 1
611 or
7- name = "search" and flags = 2
12+ name = "search" and result = 2
813 or
9- name = "match" and flags = 2
14+ name = "match" and result = 2
1015 or
11- name = "split" and flags = 3
16+ name = "split" and result = 3
1217 or
13- name = "findall" and flags = 2
18+ name = "findall" and result = 2
1419 or
15- name = "finditer" and flags = 2
20+ name = "finditer" and result = 2
1621 or
17- name = "sub" and flags = 4
22+ name = "sub" and result = 4
1823 or
19- name = "subn" and flags = 4
24+ name = "subn" and result = 4
2025}
2126
2227/**
23- * Gets the names and corresponding values of attributes of the `re` module that are likely to be
28+ * Gets the names and corresponding API nodes of members of the `re` module that are likely to be
2429 * methods taking regular expressions as arguments.
2530 *
2631 * This is a helper predicate that fixes a bad join order, and should not be inlined without checking
2732 * that this is safe.
2833 */
2934pragma [ nomagic]
30- private Value relevant_re_attr ( string name ) {
31- result = Module :: named ( "re" ) .attr ( name ) and
35+ private API :: Node relevant_re_member ( string name ) {
36+ result = API :: moduleImport ( "re" ) .getMember ( name ) and
3237 name != "escape"
3338}
3439
@@ -39,24 +44,60 @@ private Value relevant_re_attr(string name) {
3944predicate used_as_regex ( Expr s , string mode ) {
4045 ( s instanceof Bytes or s instanceof Unicode ) and
4146 /* Call to re.xxx(regex, ... [mode]) */
42- exists ( CallNode call , string name |
43- call .getArg ( 0 ) .pointsTo ( _ , _ , s . getAFlowNode ( ) ) and
44- call . getFunction ( ) . pointsTo ( relevant_re_attr ( name ) )
47+ exists ( DataFlow :: CallCfgNode call , string name |
48+ call .getArg ( 0 ) .asExpr ( ) = s and
49+ call = relevant_re_member ( name ) . getACall ( )
4550 |
4651 mode = "None"
4752 or
48- exists ( Value obj | mode = mode_from_mode_object ( obj ) |
49- exists ( int flags_arg |
50- re_module_function ( name , flags_arg ) and
51- call .getArg ( flags_arg ) .pointsTo ( obj )
52- )
53- or
54- call .getArgByName ( "flags" ) .pointsTo ( obj )
55- )
53+ mode = mode_from_node ( [ call .getArg ( re_member_flags_arg ( name ) ) , call .getArgByName ( "flags" ) ] )
54+ )
55+ }
56+
57+ /**
58+ * Gets the canonical name for the API graph node corresponding to the `re` flag `flag`. For flags
59+ * that have multiple names, we pick the long-form name as a canonical representative.
60+ */
61+ private string canonical_name ( API:: Node flag ) {
62+ result in [ "ASCII" , "IGNORECASE" , "LOCALE" , "UNICODE" , "MULTILINE" , "TEMPLATE" ] and
63+ flag = API:: moduleImport ( "re" ) .getMember ( [ result , result .prefix ( 1 ) ] )
64+ or
65+ flag = API:: moduleImport ( "re" ) .getMember ( [ "DOTALL" , "S" ] ) and result = "DOTALL"
66+ or
67+ flag = API:: moduleImport ( "re" ) .getMember ( [ "VERBOSE" , "X" ] ) and result = "VERBOSE"
68+ }
69+
70+ /**
71+ * A type tracker for regular expression flag names. Holds if the result is a node that may refer
72+ * to the `re` flag with the canonical name `flag_name`
73+ */
74+ private DataFlow:: Node re_flag_tracker ( string flag_name , DataFlow:: TypeTracker t ) {
75+ t .start ( ) and
76+ exists ( API:: Node flag | flag_name = canonical_name ( flag ) and result = flag .getAUse ( ) )
77+ or
78+ exists ( BinaryExprNode binop |
79+ ( binop .getOp ( ) instanceof BitOr or binop .getOp ( ) instanceof Add ) and
80+ binop .getAnOperand ( ) = re_flag_tracker ( flag_name , t .continue ( ) ) .asCfgNode ( ) and
81+ result .asCfgNode ( ) = binop
82+ )
83+ or
84+ exists ( DataFlow:: TypeTracker t2 , DataFlow:: Node prev | prev = re_flag_tracker ( flag_name , t2 ) |
85+ t2 = t .smallstep ( prev , result )
5686 )
5787}
5888
59- string mode_from_mode_object ( Value obj ) {
89+ /**
90+ * A type tracker for regular expression flag names. Holds if the result is a node that may refer
91+ * to the `re` flag with the canonical name `flag_name`
92+ */
93+ private DataFlow:: Node re_flag_tracker ( string flag_name ) {
94+ result = re_flag_tracker ( flag_name , DataFlow:: TypeTracker:: end ( ) )
95+ }
96+
97+ /** Gets a regular expression mode flag associated with the given data flow node. */
98+ string mode_from_node ( DataFlow:: Node node ) { node = re_flag_tracker ( result ) }
99+
100+ deprecated string mode_from_mode_object ( Value obj ) {
60101 (
61102 result = "DEBUG" or
62103 result = "IGNORECASE" or
@@ -67,8 +108,8 @@ string mode_from_mode_object(Value obj) {
67108 result = "VERBOSE"
68109 ) and
69110 exists ( int flag |
70- flag = Value:: named ( "sre_constants.SRE_FLAG_" + result ) .( ObjectInternal ) .intValue ( ) and
71- obj .( ObjectInternal ) .intValue ( ) .bitAnd ( flag ) = flag
111+ flag = Value:: named ( "sre_constants.SRE_FLAG_" + result ) .( OI :: ObjectInternal ) .intValue ( ) and
112+ obj .( OI :: ObjectInternal ) .intValue ( ) .bitAnd ( flag ) = flag
72113 )
73114}
74115
0 commit comments