Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f37eb3a

Browse files
committed
Merged revisions 83852-83853,83857,84042,84216,84274-84276,84375,85388,85478,85506-85508 via svnmerge from
svn+ssh://[email protected]/sandbox/trunk/2to3/lib2to3 ........ r83852 | benjamin.peterson | 2010-08-08 15:45:44 -0500 (Sun, 08 Aug 2010) | 1 line wrap with parens ........ r83853 | benjamin.peterson | 2010-08-08 15:46:31 -0500 (Sun, 08 Aug 2010) | 1 line use parens ........ r83857 | benjamin.peterson | 2010-08-08 15:59:49 -0500 (Sun, 08 Aug 2010) | 1 line things which use touch_import should be pre order ........ r84042 | george.boutsioukis | 2010-08-14 16:10:19 -0500 (Sat, 14 Aug 2010) | 2 lines This revision incorporates into the 2to3 tool the new, faster, tree matching algorithm developed during a GSOC project. The algorithm resides in the two added modules, btm_matcher and btm_utils. New code has been added to drive the new matching process in refactor.py and a few minor changes were made in other modules. A BM_compatible flag(False by default) has been added in fixer_base and it is set to True in most of the current fixers. ........ r84216 | benjamin.peterson | 2010-08-19 16:44:05 -0500 (Thu, 19 Aug 2010) | 1 line allow star_expr in testlist_gexp ........ r84274 | benjamin.peterson | 2010-08-22 18:40:46 -0500 (Sun, 22 Aug 2010) | 1 line wrap long line ........ r84275 | benjamin.peterson | 2010-08-22 18:42:22 -0500 (Sun, 22 Aug 2010) | 1 line cleanup ........ r84276 | benjamin.peterson | 2010-08-22 18:51:01 -0500 (Sun, 22 Aug 2010) | 1 line when there's a None value and a traceback, don't call type with it #9661 ........ r84375 | george.boutsioukis | 2010-08-31 08:38:53 -0500 (Tue, 31 Aug 2010) | 3 lines Idiomatic code changes & stylistic issues fixed in the BottomMatcher module. Thanks to Benjamin Peterson for taking the time to review the code. ........ r85388 | benjamin.peterson | 2010-10-12 17:27:44 -0500 (Tue, 12 Oct 2010) | 1 line fix urllib fixer with multiple as imports on a line #10069 ........ r85478 | benjamin.peterson | 2010-10-14 08:09:56 -0500 (Thu, 14 Oct 2010) | 1 line stop abusing docstrings ........ r85506 | benjamin.peterson | 2010-10-14 17:45:19 -0500 (Thu, 14 Oct 2010) | 1 line kill sibling import ........ r85507 | benjamin.peterson | 2010-10-14 17:54:15 -0500 (Thu, 14 Oct 2010) | 1 line remove trailing whitespace ........ r85508 | benjamin.peterson | 2010-10-14 17:55:28 -0500 (Thu, 14 Oct 2010) | 1 line typo ........
1 parent 92f60ed commit f37eb3a

59 files changed

Lines changed: 726 additions & 50 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Lib/lib2to3/Grammar.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ atom: ('(' [yield_expr|testlist_gexp] ')' |
128128
'`' testlist1 '`' |
129129
NAME | NUMBER | STRING+ | '.' '.' '.')
130130
listmaker: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
131-
testlist_gexp: test ( comp_for | (',' (test|star_expr))* [','] )
131+
testlist_gexp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
132132
lambdef: 'lambda' [varargslist] ':' test
133133
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
134134
subscriptlist: subscript (',' subscript)* [',']

Lib/lib2to3/btm_matcher.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
"""A bottom-up tree matching algorithm implementation meant to speed
2+
up 2to3's matching process. After the tree patterns are reduced to
3+
their rarest linear path, a linear Aho-Corasick automaton is
4+
created. The linear automaton traverses the linear paths from the
5+
leaves to the root of the AST and returns a set of nodes for further
6+
matching. This reduces significantly the number of candidate nodes."""
7+
8+
__author__ = "George Boutsioukis <[email protected]>"
9+
10+
import logging
11+
import itertools
12+
from collections import defaultdict
13+
14+
from . import pytree
15+
from .btm_utils import reduce_tree
16+
17+
class BMNode(object):
18+
"""Class for a node of the Aho-Corasick automaton used in matching"""
19+
count = itertools.count()
20+
def __init__(self):
21+
self.transition_table = {}
22+
self.fixers = []
23+
self.id = next(BMNode.count)
24+
self.content = ''
25+
26+
class BottomMatcher(object):
27+
"""The main matcher class. After instantiating the patterns should
28+
be added using the add_fixer method"""
29+
30+
def __init__(self):
31+
self.match = set()
32+
self.root = BMNode()
33+
self.nodes = [self.root]
34+
self.fixers = []
35+
self.logger = logging.getLogger("RefactoringTool")
36+
37+
def add_fixer(self, fixer):
38+
"""Reduces a fixer's pattern tree to a linear path and adds it
39+
to the matcher(a common Aho-Corasick automaton). The fixer is
40+
appended on the matching states and called when they are
41+
reached"""
42+
self.fixers.append(fixer)
43+
tree = reduce_tree(fixer.pattern_tree)
44+
linear = tree.get_linear_subpattern()
45+
match_nodes = self.add(linear, start=self.root)
46+
for match_node in match_nodes:
47+
match_node.fixers.append(fixer)
48+
49+
def add(self, pattern, start):
50+
"Recursively adds a linear pattern to the AC automaton"
51+
#print("adding pattern", pattern, "to", start)
52+
if not pattern:
53+
#print("empty pattern")
54+
return [start]
55+
if isinstance(pattern[0], tuple):
56+
#alternatives
57+
#print("alternatives")
58+
match_nodes = []
59+
for alternative in pattern[0]:
60+
#add all alternatives, and add the rest of the pattern
61+
#to each end node
62+
end_nodes = self.add(alternative, start=start)
63+
for end in end_nodes:
64+
match_nodes.extend(self.add(pattern[1:], end))
65+
return match_nodes
66+
else:
67+
#single token
68+
#not last
69+
if pattern[0] not in start.transition_table:
70+
#transition did not exist, create new
71+
next_node = BMNode()
72+
start.transition_table[pattern[0]] = next_node
73+
else:
74+
#transition exists already, follow
75+
next_node = start.transition_table[pattern[0]]
76+
77+
if pattern[1:]:
78+
end_nodes = self.add(pattern[1:], start=next_node)
79+
else:
80+
end_nodes = [next_node]
81+
return end_nodes
82+
83+
def run(self, leaves):
84+
"""The main interface with the bottom matcher. The tree is
85+
traversed from the bottom using the constructed
86+
automaton. Nodes are only checked once as the tree is
87+
retraversed. When the automaton fails, we give it one more
88+
shot(in case the above tree matches as a whole with the
89+
rejected leaf), then we break for the next leaf. There is the
90+
special case of multiple arguments(see code comments) where we
91+
recheck the nodes
92+
93+
Args:
94+
The leaves of the AST tree to be matched
95+
96+
Returns:
97+
A dictionary of node matches with fixers as the keys
98+
"""
99+
current_ac_node = self.root
100+
results = defaultdict(list)
101+
for leaf in leaves:
102+
current_ast_node = leaf
103+
while current_ast_node:
104+
current_ast_node.was_checked = True
105+
for child in current_ast_node.children:
106+
# multiple statements, recheck
107+
if isinstance(child, pytree.Leaf) and child.value == ";":
108+
current_ast_node.was_checked = False
109+
break
110+
if current_ast_node.type == 1:
111+
#name
112+
node_token = current_ast_node.value
113+
else:
114+
node_token = current_ast_node.type
115+
116+
if node_token in current_ac_node.transition_table:
117+
#token matches
118+
current_ac_node = current_ac_node.transition_table[node_token]
119+
for fixer in current_ac_node.fixers:
120+
if not fixer in results:
121+
results[fixer] = []
122+
results[fixer].append(current_ast_node)
123+
124+
else:
125+
#matching failed, reset automaton
126+
current_ac_node = self.root
127+
if (current_ast_node.parent is not None
128+
and current_ast_node.parent.was_checked):
129+
#the rest of the tree upwards has been checked, next leaf
130+
break
131+
132+
#recheck the rejected node once from the root
133+
if node_token in current_ac_node.transition_table:
134+
#token matches
135+
current_ac_node = current_ac_node.transition_table[node_token]
136+
for fixer in current_ac_node.fixers:
137+
if not fixer in results.keys():
138+
results[fixer] = []
139+
results[fixer].append(current_ast_node)
140+
141+
current_ast_node = current_ast_node.parent
142+
return results
143+
144+
def print_ac(self):
145+
"Prints a graphviz diagram of the BM automaton(for debugging)"
146+
print("digraph g{")
147+
def print_node(node):
148+
for subnode_key in node.transition_table.keys():
149+
subnode = node.transition_table[subnode_key]
150+
print("%d -> %d [label=%s] //%s" %
151+
(node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
152+
if subnode_key == 1:
153+
print(subnode.content)
154+
print_node(subnode)
155+
print_node(self.root)
156+
print("}")
157+
158+
# taken from pytree.py for debugging; only used by print_ac
159+
_type_reprs = {}
160+
def type_repr(type_num):
161+
global _type_reprs
162+
if not _type_reprs:
163+
from .pygram import python_symbols
164+
# printing tokens is possible but not as useful
165+
# from .pgen2 import token // token.__dict__.items():
166+
for name, val in python_symbols.__dict__.items():
167+
if type(val) == int: _type_reprs[val] = name
168+
return _type_reprs.setdefault(type_num, type_num)

0 commit comments

Comments
 (0)