Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 3e73152

Browse files
authored
Speed up new backtracking parser (psf#2728)
1 parent 521d1b8 commit 3e73152

4 files changed

Lines changed: 176 additions & 23 deletions

File tree

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
at least one pre-existing blank line (#2736)
2525
- Verbose mode also now describes how a project root was discovered and which paths will
2626
be formatted. (#2526)
27+
- Speed-up the new backtracking parser about 4X in general (enabled when
28+
`--target-version` is set to 3.10 and higher). (#2728)
2729

2830
### Packaging
2931

src/blib2to3/pgen2/parse.py

Lines changed: 66 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,17 @@ def lam_sub(grammar: Grammar, node: RawNode) -> NL:
4646
return Node(type=node[0], children=node[3], context=node[2])
4747

4848

49+
# A placeholder node, used when parser is backtracking.
50+
DUMMY_NODE = (-1, None, None, None)
51+
52+
53+
def stack_copy(
54+
stack: List[Tuple[DFAS, int, RawNode]]
55+
) -> List[Tuple[DFAS, int, RawNode]]:
56+
"""Nodeless stack copy."""
57+
return [(copy.deepcopy(dfa), label, DUMMY_NODE) for dfa, label, _ in stack]
58+
59+
4960
class Recorder:
5061
def __init__(self, parser: "Parser", ilabels: List[int], context: Context) -> None:
5162
self.parser = parser
@@ -54,21 +65,40 @@ def __init__(self, parser: "Parser", ilabels: List[int], context: Context) -> No
5465

5566
self._dead_ilabels: Set[int] = set()
5667
self._start_point = self.parser.stack
57-
self._points = {ilabel: copy.deepcopy(self._start_point) for ilabel in ilabels}
68+
self._points = {ilabel: stack_copy(self._start_point) for ilabel in ilabels}
5869

5970
@property
6071
def ilabels(self) -> Set[int]:
6172
return self._dead_ilabels.symmetric_difference(self._ilabels)
6273

6374
@contextmanager
6475
def switch_to(self, ilabel: int) -> Iterator[None]:
65-
self.parser.stack = self._points[ilabel]
76+
with self.backtrack():
77+
self.parser.stack = self._points[ilabel]
78+
try:
79+
yield
80+
except ParseError:
81+
self._dead_ilabels.add(ilabel)
82+
finally:
83+
self.parser.stack = self._start_point
84+
85+
@contextmanager
86+
def backtrack(self) -> Iterator[None]:
87+
"""
88+
Use the node-level invariant ones for basic parsing operations (push/pop/shift).
89+
These still will operate on the stack; but they won't create any new nodes, or
90+
modify the contents of any other existing nodes.
91+
92+
This saves us a ton of time when we are backtracking, since we
93+
want to restore to the initial state as quick as possible, which
94+
can only be done by having as little mutatations as possible.
95+
"""
96+
is_backtracking = self.parser.is_backtracking
6697
try:
98+
self.parser.is_backtracking = True
6799
yield
68-
except ParseError:
69-
self._dead_ilabels.add(ilabel)
70100
finally:
71-
self.parser.stack = self._start_point
101+
self.parser.is_backtracking = is_backtracking
72102

73103
def add_token(self, tok_type: int, tok_val: Text, raw: bool = False) -> None:
74104
func: Callable[..., Any]
@@ -179,6 +209,7 @@ def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None:
179209
self.grammar = grammar
180210
# See note in docstring above. TL;DR this is ignored.
181211
self.convert = convert or lam_sub
212+
self.is_backtracking = False
182213

183214
def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None:
184215
"""Prepare for parsing.
@@ -319,28 +350,40 @@ def classify(self, type: int, value: Text, context: Context) -> List[int]:
319350

320351
def shift(self, type: int, value: Text, newstate: int, context: Context) -> None:
321352
"""Shift a token. (Internal)"""
322-
dfa, state, node = self.stack[-1]
323-
rawnode: RawNode = (type, value, context, None)
324-
newnode = convert(self.grammar, rawnode)
325-
assert node[-1] is not None
326-
node[-1].append(newnode)
327-
self.stack[-1] = (dfa, newstate, node)
353+
if self.is_backtracking:
354+
dfa, state, _ = self.stack[-1]
355+
self.stack[-1] = (dfa, newstate, DUMMY_NODE)
356+
else:
357+
dfa, state, node = self.stack[-1]
358+
rawnode: RawNode = (type, value, context, None)
359+
newnode = convert(self.grammar, rawnode)
360+
assert node[-1] is not None
361+
node[-1].append(newnode)
362+
self.stack[-1] = (dfa, newstate, node)
328363

329364
def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None:
330365
"""Push a nonterminal. (Internal)"""
331-
dfa, state, node = self.stack[-1]
332-
newnode: RawNode = (type, None, context, [])
333-
self.stack[-1] = (dfa, newstate, node)
334-
self.stack.append((newdfa, 0, newnode))
366+
if self.is_backtracking:
367+
dfa, state, _ = self.stack[-1]
368+
self.stack[-1] = (dfa, newstate, DUMMY_NODE)
369+
self.stack.append((newdfa, 0, DUMMY_NODE))
370+
else:
371+
dfa, state, node = self.stack[-1]
372+
newnode: RawNode = (type, None, context, [])
373+
self.stack[-1] = (dfa, newstate, node)
374+
self.stack.append((newdfa, 0, newnode))
335375

336376
def pop(self) -> None:
337377
"""Pop a nonterminal. (Internal)"""
338-
popdfa, popstate, popnode = self.stack.pop()
339-
newnode = convert(self.grammar, popnode)
340-
if self.stack:
341-
dfa, state, node = self.stack[-1]
342-
assert node[-1] is not None
343-
node[-1].append(newnode)
378+
if self.is_backtracking:
379+
self.stack.pop()
344380
else:
345-
self.rootnode = newnode
346-
self.rootnode.used_names = self.used_names
381+
popdfa, popstate, popnode = self.stack.pop()
382+
newnode = convert(self.grammar, popnode)
383+
if self.stack:
384+
dfa, state, node = self.stack[-1]
385+
assert node[-1] is not None
386+
node[-1].append(newnode)
387+
else:
388+
self.rootnode = newnode
389+
self.rootnode.used_names = self.used_names
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
re.match()
2+
match = a
3+
with match() as match:
4+
match = f"{match}"
5+
6+
re.match()
7+
match = a
8+
with match() as match:
9+
match = f"{match}"
10+
11+
12+
def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
13+
if not target_versions:
14+
# No target_version specified, so try all grammars.
15+
return [
16+
# Python 3.7+
17+
pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords,
18+
# Python 3.0-3.6
19+
pygram.python_grammar_no_print_statement_no_exec_statement,
20+
# Python 2.7 with future print_function import
21+
pygram.python_grammar_no_print_statement,
22+
# Python 2.7
23+
pygram.python_grammar,
24+
]
25+
26+
match match:
27+
case case:
28+
match match:
29+
case case:
30+
pass
31+
32+
if all(version.is_python2() for version in target_versions):
33+
# Python 2-only code, so try Python 2 grammars.
34+
return [
35+
# Python 2.7 with future print_function import
36+
pygram.python_grammar_no_print_statement,
37+
# Python 2.7
38+
pygram.python_grammar,
39+
]
40+
41+
re.match()
42+
match = a
43+
with match() as match:
44+
match = f"{match}"
45+
46+
def test_patma_139(self):
47+
x = False
48+
match x:
49+
case bool(z):
50+
y = 0
51+
self.assertIs(x, False)
52+
self.assertEqual(y, 0)
53+
self.assertIs(z, x)
54+
55+
# Python 3-compatible code, so only try Python 3 grammar.
56+
grammars = []
57+
if supports_feature(target_versions, Feature.PATTERN_MATCHING):
58+
# Python 3.10+
59+
grammars.append(pygram.python_grammar_soft_keywords)
60+
# If we have to parse both, try to parse async as a keyword first
61+
if not supports_feature(
62+
target_versions, Feature.ASYNC_IDENTIFIERS
63+
) and not supports_feature(target_versions, Feature.PATTERN_MATCHING):
64+
# Python 3.7-3.9
65+
grammars.append(
66+
pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords
67+
)
68+
if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS):
69+
# Python 3.0-3.6
70+
grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement)
71+
72+
def test_patma_155(self):
73+
x = 0
74+
y = None
75+
match x:
76+
case 1e1000:
77+
y = 0
78+
self.assertEqual(x, 0)
79+
self.assertIs(y, None)
80+
81+
x = range(3)
82+
match x:
83+
case [y, case as x, z]:
84+
w = 0
85+
86+
# At least one of the above branches must have been taken, because every Python
87+
# version has exactly one of the two 'ASYNC_*' flags
88+
return grammars
89+
90+
91+
def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node:
92+
"""Given a string with source, return the lib2to3 Node."""
93+
if not src_txt.endswith("\n"):
94+
src_txt += "\n"
95+
96+
grammars = get_grammars(set(target_versions))
97+
98+
99+
re.match()
100+
match = a
101+
with match() as match:
102+
match = f"{match}"
103+
104+
re.match()
105+
match = a
106+
with match() as match:
107+
match = f"{match}"

tests/test_format.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
"pattern_matching_complex",
7070
"pattern_matching_extras",
7171
"pattern_matching_style",
72+
"pattern_matching_generic",
7273
"parenthesized_context_managers",
7374
]
7475

0 commit comments

Comments
 (0)