11import python
2- import semmle.python.objects.ObjectInternal
2+ deprecated import semmle.python.objects.ObjectInternal as OI
3+ private import semmle.python.ApiGraphs
34
4- private predicate re_module_function ( string name , int flags ) {
5- name = "compile" and flags = 1
5+ /**
6+ * Gets the positional argument index containing the regular expression flags for the member of the
7+ * `re` module with the name `name`.
8+ */
9+ private int re_member_flags_arg ( string name ) {
10+ name = "compile" and result = 1
611 or
7- name = "search" and flags = 2
12+ name = "search" and result = 2
813 or
9- name = "match" and flags = 2
14+ name = "match" and result = 2
1015 or
11- name = "split" and flags = 3
16+ name = "split" and result = 3
1217 or
13- name = "findall" and flags = 2
18+ name = "findall" and result = 2
1419 or
15- name = "finditer" and flags = 2
20+ name = "finditer" and result = 2
1621 or
17- name = "sub" and flags = 4
22+ name = "sub" and result = 4
1823 or
19- name = "subn" and flags = 4
24+ name = "subn" and result = 4
2025}
2126
2227/**
23- * Gets the names and corresponding values of attributes of the `re` module that are likely to be
28+ * Gets the names and corresponding API nodes of members of the `re` module that are likely to be
2429 * methods taking regular expressions as arguments.
2530 *
2631 * This is a helper predicate that fixes a bad join order, and should not be inlined without checking
2732 * that this is safe.
2833 */
2934pragma [ nomagic]
30- private Value relevant_re_attr ( string name ) {
31- result = Module :: named ( "re" ) .attr ( name ) and
35+ private API :: Node relevant_re_member ( string name ) {
36+ result = API :: moduleImport ( "re" ) .getMember ( name ) and
3237 name != "escape"
3338}
3439
@@ -39,24 +44,78 @@ private Value relevant_re_attr(string name) {
3944predicate used_as_regex ( Expr s , string mode ) {
4045 ( s instanceof Bytes or s instanceof Unicode ) and
4146 /* Call to re.xxx(regex, ... [mode]) */
42- exists ( CallNode call , string name |
43- call .getArg ( 0 ) .pointsTo ( _ , _ , s . getAFlowNode ( ) ) and
44- call . getFunction ( ) . pointsTo ( relevant_re_attr ( name ) )
47+ exists ( DataFlow :: CallCfgNode call , string name |
48+ call .getArg ( 0 ) .asExpr ( ) = s and
49+ call = relevant_re_member ( name ) . getACall ( )
4550 |
4651 mode = "None"
4752 or
48- exists ( Value obj | mode = mode_from_mode_object ( obj ) |
49- exists ( int flags_arg |
50- re_module_function ( name , flags_arg ) and
51- call .getArg ( flags_arg ) .pointsTo ( obj )
52- )
53- or
54- call .getArgByName ( "flags" ) .pointsTo ( obj )
53+ mode = mode_from_node ( [ call .getArg ( re_member_flags_arg ( name ) ) , call .getArgByName ( "flags" ) ] )
54+ )
55+ }
56+
57+ /**
58+ * Gets the canonical name for the API graph node corresponding to the `re` flag `flag`. For flags
59+ * that have multiple names, we pick the long-form name as a canonical representative.
60+ */
61+ private string canonical_name ( API:: Node flag ) {
62+ result in [ "ASCII" , "IGNORECASE" , "LOCALE" , "UNICODE" , "MULTILINE" , "TEMPLATE" ] and
63+ flag = API:: moduleImport ( "re" ) .getMember ( [ result , result .prefix ( 1 ) ] )
64+ or
65+ flag = API:: moduleImport ( "re" ) .getMember ( [ "DOTALL" , "S" ] ) and result = "DOTALL"
66+ or
67+ flag = API:: moduleImport ( "re" ) .getMember ( [ "VERBOSE" , "X" ] ) and result = "VERBOSE"
68+ }
69+
70+ /**
71+ * A type tracker for regular expression flag names. Holds if the result is a node that may refer
72+ * to the `re` flag with the canonical name `flag_name`
73+ */
74+ private DataFlow:: LocalSourceNode re_flag_tracker ( string flag_name , DataFlow:: TypeTracker t ) {
75+ t .start ( ) and
76+ exists ( API:: Node flag | flag_name = canonical_name ( flag ) and result = flag .getAUse ( ) )
77+ or
78+ exists ( BinaryExprNode binop , DataFlow:: Node operand |
79+ operand .getALocalSource ( ) = re_flag_tracker ( flag_name , t .continue ( ) ) and
80+ operand .asCfgNode ( ) = binop .getAnOperand ( ) and
81+ ( binop .getOp ( ) instanceof BitOr or binop .getOp ( ) instanceof Add ) and
82+ result .asCfgNode ( ) = binop
83+ )
84+ or
85+ // Due to bad performance when using normal setup with `re_flag_tracker(t2, attr_name).track(t2, t)`
86+ // we have inlined that code and forced a join
87+ exists ( DataFlow:: TypeTracker t2 |
88+ exists ( DataFlow:: StepSummary summary |
89+ re_flag_tracker_first_join ( t2 , flag_name , result , summary ) and
90+ t = t2 .append ( summary )
5591 )
5692 )
5793}
5894
59- string mode_from_mode_object ( Value obj ) {
95+ pragma [ nomagic]
96+ private predicate re_flag_tracker_first_join (
97+ DataFlow:: TypeTracker t2 , string flag_name , DataFlow:: Node res , DataFlow:: StepSummary summary
98+ ) {
99+ DataFlow:: StepSummary:: step ( re_flag_tracker ( flag_name , t2 ) , res , summary )
100+ }
101+
102+ /**
103+ * A type tracker for regular expression flag names. Holds if the result is a node that may refer
104+ * to the `re` flag with the canonical name `flag_name`
105+ */
106+ private DataFlow:: Node re_flag_tracker ( string flag_name ) {
107+ re_flag_tracker ( flag_name , DataFlow:: TypeTracker:: end ( ) ) .flowsTo ( result )
108+ }
109+
110+ /** Gets a regular expression mode flag associated with the given data flow node. */
111+ string mode_from_node ( DataFlow:: Node node ) { node = re_flag_tracker ( result ) }
112+
113+ /**
114+ * DEPRECATED 2021-02-24 -- use `mode_from_node` instead.
115+ *
116+ * Gets a regular expression mode flag associated with the given value.
117+ */
118+ deprecated string mode_from_mode_object ( Value obj ) {
60119 (
61120 result = "DEBUG" or
62121 result = "IGNORECASE" or
@@ -67,8 +126,8 @@ string mode_from_mode_object(Value obj) {
67126 result = "VERBOSE"
68127 ) and
69128 exists ( int flag |
70- flag = Value:: named ( "sre_constants.SRE_FLAG_" + result ) .( ObjectInternal ) .intValue ( ) and
71- obj .( ObjectInternal ) .intValue ( ) .bitAnd ( flag ) = flag
129+ flag = Value:: named ( "sre_constants.SRE_FLAG_" + result ) .( OI :: ObjectInternal ) .intValue ( ) and
130+ obj .( OI :: ObjectInternal ) .intValue ( ) .bitAnd ( flag ) = flag
72131 )
73132}
74133
0 commit comments