From 2aa81a8bc832549dcfaa4b5d8ccf8347b774c883 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Sat, 31 Oct 2020 19:01:22 +0200 Subject: [PATCH 1/4] bpo-42218: Correctly handle errors in left-recursive rules Left-recursive rules need to check for errors explicitly, since even if the rule returns NULL, the parsing might continue and lead to long-distance failures. --- Lib/test/test_syntax.py | 5 +++++ Parser/parser.c | 18 ++++++++++++++++++ Tools/peg_generator/pegen/c_generator.py | 3 +++ 3 files changed, 26 insertions(+) diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index e89d9401f2c397..f6c492015d0f4b 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -972,6 +972,11 @@ def func2(): """ self._check_error(code, "invalid syntax") + def test_invalid_line_continuation_left_recursive(self): + # Check bpo-42218: SyntaxErrors following left-recursive rules + # (t_primary_raw in this case) need to be tested explicitly + self._check_error('A.\u018a\\ ', "unexpected character after line continuation character") + def test_main(): support.run_unittest(SyntaxTestCase) from test import test_syntax diff --git a/Parser/parser.c b/Parser/parser.c index a882a81344cc61..48ebfe65aedafa 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -3461,6 +3461,8 @@ dotted_name_rule(Parser *p) } p->mark = _mark; void *_raw = dotted_name_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -9045,6 +9047,8 @@ bitwise_or_rule(Parser *p) } p->mark = _mark; void *_raw = bitwise_or_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -9159,6 +9163,8 @@ bitwise_xor_rule(Parser *p) } p->mark = _mark; void *_raw = bitwise_xor_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -9273,6 +9279,8 @@ bitwise_and_rule(Parser *p) } p->mark = _mark; void *_raw = bitwise_and_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -9387,6 +9395,8 @@ shift_expr_rule(Parser *p) } p->mark = _mark; void *_raw = shift_expr_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -9540,6 +9550,8 @@ sum_rule(Parser *p) } p->mark = _mark; void *_raw = sum_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -9699,6 +9711,8 @@ term_rule(Parser *p) } p->mark = _mark; void *_raw = term_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -10303,6 +10317,8 @@ primary_rule(Parser *p) } p->mark = _mark; void *_raw = primary_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; @@ -13943,6 +13959,8 @@ t_primary_rule(Parser *p) } p->mark = _mark; void *_raw = t_primary_raw(p); + if (p->error_indicator) + return NULL; if (_raw == NULL || p->mark <= _resmark) break; _resmark = p->mark; diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index 52bdb844e6bdd6..6af0d3f7a2a14d 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -502,6 +502,9 @@ def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None: ) self.print("p->mark = _mark;") self.print(f"void *_raw = {node.name}_raw(p);") + self.print("if (p->error_indicator)") + with self.indent(): + self.print("return NULL;") self.print("if (_raw == NULL || p->mark <= _resmark)") with self.indent(): self.print("break;") From 7fe6209f9468765ab0e8768a031a11ef241d373c Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sat, 31 Oct 2020 17:50:26 +0000 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst b/Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst new file mode 100644 index 00000000000000..66a940ee5fd760 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst @@ -0,0 +1,2 @@ +Check for errors in left-recursive parser rules to avoid cases where such errors do not +get handled in time and appear as long-distance bugs in other places. \ No newline at end of file From 53ed0f46877af95e93f7b3852f7c0746cc2c3e38 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Sat, 31 Oct 2020 19:53:16 +0200 Subject: [PATCH 3/4] Update Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst Co-authored-by: Pablo Galindo --- .../2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst b/Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst index 66a940ee5fd760..a38a310e4b45b8 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2020-10-31-17-50-23.bpo-42218.Dp_Z3v.rst @@ -1,2 +1,3 @@ -Check for errors in left-recursive parser rules to avoid cases where such errors do not -get handled in time and appear as long-distance bugs in other places. \ No newline at end of file +Fixed a bug in the PEG parser that was causing crashes in debug mode. Now errors are checked +in left-recursive rules to avoid cases where such errors do not get handled in time and appear +as long-distance crashes in other places. From 9f431b2a4cd523552113bcce0e14703bbf0a7f98 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Sat, 31 Oct 2020 20:01:06 +0200 Subject: [PATCH 4/4] Add one more test case --- Lib/test/test_syntax.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index f6c492015d0f4b..91ca1db43a74f2 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -975,7 +975,10 @@ def func2(): def test_invalid_line_continuation_left_recursive(self): # Check bpo-42218: SyntaxErrors following left-recursive rules # (t_primary_raw in this case) need to be tested explicitly - self._check_error('A.\u018a\\ ', "unexpected character after line continuation character") + self._check_error("A.\u018a\\ ", + "unexpected character after line continuation character") + self._check_error("A.\u03bc\\\n", + "unexpected EOF while parsing") def test_main(): support.run_unittest(SyntaxTestCase)