@@ -249,6 +249,8 @@ module EssaFlow {
249249 // Flow inside an unpacking assignment
250250 iterableUnpackingFlowStep ( nodeFrom , nodeTo )
251251 or
252+ matchFlowStep ( nodeFrom , nodeTo )
253+ or
252254 // Overflow keyword argument
253255 exists ( CallNode call , CallableValue callable |
254256 call = callable .getACall ( ) and
@@ -982,6 +984,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
982984 posOverflowStoreStep ( nodeFrom , c , nodeTo )
983985 or
984986 kwOverflowStoreStep ( nodeFrom , c , nodeTo )
987+ or
988+ matchStoreStep ( nodeFrom , c , nodeTo )
985989}
986990
987991/** Data flows from an element of a list to the list. */
@@ -1124,6 +1128,8 @@ predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
11241128 or
11251129 iterableUnpackingReadStep ( nodeFrom , c , nodeTo )
11261130 or
1131+ matchReadStep ( nodeFrom , c , nodeTo )
1132+ or
11271133 popReadStep ( nodeFrom , c , nodeTo )
11281134 or
11291135 forReadStep ( nodeFrom , c , nodeTo )
@@ -1553,6 +1559,290 @@ module IterableUnpacking {
15531559
15541560import IterableUnpacking
15551561
1562+ /**
1563+ * There are a number of patterns available for the match statement.
1564+ * Each one transfers data and content differently to its parts.
1565+ *
1566+ * Furthermore, given a successful match, we can infer some daa about
1567+ * the subject. Consider the example:
1568+ * ```python
1569+ * match choice:
1570+ * case 'Y':
1571+ * ...body
1572+ * ```
1573+ * Inside `body`, we know that `choice` has the value `'Y'`.
1574+ *
1575+ * A similar thing happens with the "as pattern". Consider the example:
1576+ * ```python
1577+ * match choice:
1578+ * case ('y'|'Y') as c:
1579+ * ...body
1580+ * ```
1581+ * By the binding rules, there is data flow from `choice` to `c`. But we
1582+ * can infer the value of `c` to be either `'y'` or `'Y'` if the match succeeds.
1583+ *
1584+ * We will treat such inference separately as guards. First we will model the data flow
1585+ * stemming from the bindings and the matching of shape. Below, 'subject' is not necessarily the
1586+ * top-level subject of the match, but rather the part recursively matched by the current pattern.
1587+ * For instance, in the example:
1588+ * ```python
1589+ * match command:
1590+ * case ('quit' as c) | ('go', ('up'|'down') as c):
1591+ * ...body
1592+ * ```
1593+ * `command` is the subject of the as-pattern, while the second component of `command` is the subject
1594+ * of the first capture pattern. As such, 'subject' refers to the pattern under evaluation.
1595+ *
1596+ * - as pattern: subject flows to alias as well as to the interior pattern
1597+ * - or pattern: subject flows to each alternative
1598+ * - literal pattern: no flow
1599+ * - capture pattern: subject flows to the variable
1600+ * - wildcard pattern: no flow
1601+ * - value pattern: no flow
1602+ * - sequence pattern: each element reads from subject at the associated index
1603+ * - star pattern: subject flows to the variable, possibly via a conversion
1604+ * - mapping pattern: each value reads from subject at the associated key
1605+ * - double star pattern: subject flows to the variable, possibly via a conversion
1606+ * - key-value pattern: the value reads from the subject at the key (see mapping pattern)
1607+ * - class pattern: all keywords read the appropriate attribute from the subject
1608+ * - keyword pattern: the appropriate attribute is read from the subject (see class pattern)
1609+ *
1610+ * Inside the class pattern, we also find positional arguments. They are converted to
1611+ * keyword arguments using the `__match_args__` attribute on the class. We do not
1612+ * currently model this.
1613+ */
1614+ module MatchUnpacking {
1615+ /**
1616+ * The subject of a match flows to each top-level pattern
1617+ * (a pattern directly under a `case` statement).
1618+ *
1619+ * We could consider a model closer to use-use-flow, where the subject
1620+ * only flows to the first top-level pattern and from there to the
1621+ * following ones.
1622+ */
1623+ predicate matchSubjectFlowStep ( Node nodeFrom , Node nodeTo ) {
1624+ exists ( MatchStmt match , Expr subject , Pattern target |
1625+ subject = match .getSubject ( ) and
1626+ target = match .getCase ( _) .( Case ) .getPattern ( )
1627+ |
1628+ nodeFrom .asExpr ( ) = subject and
1629+ nodeTo .asCfgNode ( ) .getNode ( ) = target
1630+ )
1631+ }
1632+
1633+ /**
1634+ * as pattern: subject flows to alias as well as to the interior pattern
1635+ * syntax (toplevel): `case pattern as alias:`
1636+ */
1637+ predicate matchAsFlowStep ( Node nodeFrom , Node nodeTo ) {
1638+ exists ( MatchAsPattern subject , Name alias | alias = subject .getAlias ( ) |
1639+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1640+ (
1641+ // the subject flows to the alias
1642+ nodeTo .asVar ( ) .getDefinition ( ) .( PatternAliasDefinition ) .getDefiningNode ( ) .getNode ( ) = alias
1643+ or
1644+ // the subject flows to the interior pattern
1645+ nodeTo .asCfgNode ( ) .getNode ( ) = subject .getPattern ( )
1646+ )
1647+ )
1648+ }
1649+
1650+ /**
1651+ * or pattern: subject flows to each alternative
1652+ * syntax (toplevel): `case alt1 | alt2:`
1653+ */
1654+ predicate matchOrFlowStep ( Node nodeFrom , Node nodeTo ) {
1655+ exists ( MatchOrPattern subject , Pattern pattern | pattern = subject .getAPattern ( ) |
1656+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1657+ nodeTo .asCfgNode ( ) .getNode ( ) = pattern
1658+ )
1659+ }
1660+
1661+ /**
1662+ * capture pattern: subject flows to the variable
1663+ * syntax (toplevel): `case var:`
1664+ */
1665+ predicate matchCaptureFlowStep ( Node nodeFrom , Node nodeTo ) {
1666+ exists ( MatchCapturePattern capture , Name var | capture .getVariable ( ) = var |
1667+ nodeFrom .asCfgNode ( ) .getNode ( ) = capture and
1668+ nodeTo .asVar ( ) .getDefinition ( ) .( PatternCaptureDefinition ) .getDefiningNode ( ) .getNode ( ) = var
1669+ )
1670+ }
1671+
1672+ /**
1673+ * sequence pattern: each element reads from subject at the associated index
1674+ * syntax (toplevel): `case [a, b]:`
1675+ */
1676+ predicate matchSequenceReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1677+ exists ( MatchSequencePattern subject , int index , Pattern element |
1678+ element = subject .getPattern ( index )
1679+ |
1680+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1681+ nodeTo .asCfgNode ( ) .getNode ( ) = element and
1682+ (
1683+ // tuple content
1684+ c .( TupleElementContent ) .getIndex ( ) = index
1685+ or
1686+ // list content
1687+ c instanceof ListElementContent
1688+ // set content is excluded from sequence patterns,
1689+ // see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
1690+ )
1691+ )
1692+ }
1693+
1694+ /**
1695+ * star pattern: subject flows to the variable, possibly via a conversion
1696+ * syntax (toplevel): `case *var:`
1697+ *
1698+ * We decompose this flow into a read step and a store step. The read step
1699+ * reads both tupe and list content, the store step only stores list content.
1700+ * This way, we convert all content to list content.
1701+ *
1702+ * This is the read step.
1703+ */
1704+ predicate matchStarReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1705+ exists ( MatchSequencePattern subject , int index , MatchStarPattern star |
1706+ star = subject .getPattern ( index )
1707+ |
1708+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1709+ nodeTo = TStarPatternElementNode ( star ) and
1710+ (
1711+ // tuple content
1712+ c .( TupleElementContent ) .getIndex ( ) >= index
1713+ or
1714+ // list content
1715+ c instanceof ListElementContent
1716+ // set content is excluded from sequence patterns,
1717+ // see https://www.python.org/dev/peps/pep-0635/#sequence-patterns
1718+ )
1719+ )
1720+ }
1721+
1722+ /**
1723+ * star pattern: subject flows to the variable, possibly via a conversion
1724+ * syntax (toplevel): `case *var:`
1725+ *
1726+ * We decompose this flow into a read step and a store step. The read step
1727+ * reads both tupe and list content, the store step only stores list content.
1728+ * This way, we convert all content to list content.
1729+ *
1730+ * This is the store step.
1731+ */
1732+ predicate matchStarStoreStep ( Node nodeFrom , Content c , Node nodeTo ) {
1733+ exists ( MatchStarPattern star |
1734+ nodeFrom = TStarPatternElementNode ( star ) and
1735+ nodeTo .asCfgNode ( ) .getNode ( ) = star .getTarget ( ) and
1736+ c instanceof ListElementContent
1737+ )
1738+ }
1739+
1740+ /**
1741+ * mapping pattern: each value reads from subject at the associated key
1742+ * syntax (toplevel): `case {"color": c, "height": x}:`
1743+ */
1744+ predicate matchMappingReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1745+ exists (
1746+ MatchMappingPattern subject , MatchKeyValuePattern keyValue , MatchLiteralPattern key ,
1747+ Pattern value
1748+ |
1749+ keyValue = subject .getAMapping ( ) and
1750+ key = keyValue .getKey ( ) and
1751+ value = keyValue .getValue ( )
1752+ |
1753+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1754+ nodeTo .asCfgNode ( ) .getNode ( ) = value and
1755+ c .( DictionaryElementContent ) .getKey ( ) = key .getLiteral ( ) .( StrConst ) .getText ( )
1756+ )
1757+ }
1758+
1759+ /**
1760+ * double star pattern: subject flows to the variable, possibly via a conversion
1761+ * syntax (toplevel): `case {**var}:`
1762+ *
1763+ * Dictionary content flows to the double star, but all mentioned keys in the
1764+ * mapping pattern should be cleared.
1765+ */
1766+ predicate matchMappingFlowStep ( Node nodeFrom , Node nodeTo ) {
1767+ exists ( MatchMappingPattern subject , MatchDoubleStarPattern dstar |
1768+ dstar = subject .getAMapping ( )
1769+ |
1770+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1771+ nodeTo .asCfgNode ( ) .getNode ( ) = dstar .getTarget ( )
1772+ )
1773+ }
1774+
1775+ /**
1776+ * Bindings that are mentioned in a mapping pattern will not be available
1777+ * to a double star pattern in the same mapping pattern.
1778+ */
1779+ predicate matchMappingClearStep ( Node n , Content c ) {
1780+ exists (
1781+ MatchMappingPattern subject , MatchKeyValuePattern keyValue , MatchLiteralPattern key ,
1782+ MatchDoubleStarPattern dstar
1783+ |
1784+ keyValue = subject .getAMapping ( ) and
1785+ key = keyValue .getKey ( ) and
1786+ dstar = subject .getAMapping ( )
1787+ |
1788+ n .asCfgNode ( ) .getNode ( ) = dstar .getTarget ( ) and
1789+ c .( DictionaryElementContent ) .getKey ( ) = key .getLiteral ( ) .( StrConst ) .getText ( )
1790+ )
1791+ }
1792+
1793+ /**
1794+ * class pattern: all keywords read the appropriate attribute from the subject
1795+ * syntax (toplevel): `case ClassName(attr = val):`
1796+ */
1797+ predicate matchClassReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1798+ exists ( MatchClassPattern subject , MatchKeywordPattern keyword , Name attr , Pattern value |
1799+ keyword = subject .getKeyword ( _) and
1800+ attr = keyword .getAttribute ( ) and
1801+ value = keyword .getValue ( )
1802+ |
1803+ nodeFrom .asCfgNode ( ) .getNode ( ) = subject and
1804+ nodeTo .asCfgNode ( ) .getNode ( ) = value and
1805+ c .( AttributeContent ) .getAttribute ( ) = attr .getId ( )
1806+ )
1807+ }
1808+
1809+ /** All flow steps associated with match. */
1810+ predicate matchFlowStep ( Node nodeFrom , Node nodeTo ) {
1811+ matchSubjectFlowStep ( nodeFrom , nodeTo )
1812+ or
1813+ matchAsFlowStep ( nodeFrom , nodeTo )
1814+ or
1815+ matchOrFlowStep ( nodeFrom , nodeTo )
1816+ or
1817+ matchCaptureFlowStep ( nodeFrom , nodeTo )
1818+ or
1819+ matchMappingFlowStep ( nodeFrom , nodeTo )
1820+ }
1821+
1822+ /** All read steps associated with match. */
1823+ predicate matchReadStep ( Node nodeFrom , Content c , Node nodeTo ) {
1824+ matchClassReadStep ( nodeFrom , c , nodeTo )
1825+ or
1826+ matchSequenceReadStep ( nodeFrom , c , nodeTo )
1827+ or
1828+ matchMappingReadStep ( nodeFrom , c , nodeTo )
1829+ or
1830+ matchStarReadStep ( nodeFrom , c , nodeTo )
1831+ }
1832+
1833+ /** All store steps associated with match. */
1834+ predicate matchStoreStep ( Node nodeFrom , Content c , Node nodeTo ) {
1835+ matchStarStoreStep ( nodeFrom , c , nodeTo )
1836+ }
1837+
1838+ /**
1839+ * All clear steps associated with match
1840+ */
1841+ predicate matchClearStep ( Node n , Content c ) { matchMappingClearStep ( n , c ) }
1842+ }
1843+
1844+ import MatchUnpacking
1845+
15561846/** Data flows from a sequence to a call to `pop` on the sequence. */
15571847predicate popReadStep ( CfgNode nodeFrom , Content c , CfgNode nodeTo ) {
15581848 // set.pop or list.pop
@@ -1635,18 +1925,28 @@ predicate kwUnpackReadStep(CfgNode nodeFrom, DictionaryElementContent c, Node no
16351925}
16361926
16371927/**
1638- * Holds if values stored inside content `c` are cleared at node `n`. For example,
1639- * any value stored inside `f` is cleared at the pre-update node associated with `x`
1640- * in `x.f = newValue`.
1928+ * Clear content at key `name` of the synthesized dictionary `TKwOverflowNode(call, callable)`,
1929+ * whenever `call` unpacks `name`.
16411930 */
1642- predicate clearsContent ( Node n , Content c ) {
1931+ predicate kwOverflowClearStep ( Node n , Content c ) {
16431932 exists ( CallNode call , CallableValue callable , string name |
16441933 call_unpacks ( call , _, callable , name , _) and
16451934 n = TKwOverflowNode ( call , callable ) and
16461935 c .( DictionaryElementContent ) .getKey ( ) = name
16471936 )
16481937}
16491938
1939+ /**
1940+ * Holds if values stored inside content `c` are cleared at node `n`. For example,
1941+ * any value stored inside `f` is cleared at the pre-update node associated with `x`
1942+ * in `x.f = newValue`.
1943+ */
1944+ predicate clearsContent ( Node n , Content c ) {
1945+ kwOverflowClearStep ( n , c )
1946+ or
1947+ matchClearStep ( n , c )
1948+ }
1949+
16501950//--------
16511951// Fancy context-sensitive guards
16521952//--------
0 commit comments