From fa9d70820466634910d89075ab7f2d7cc6144804 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 29 Apr 2020 02:02:34 +0300 Subject: [PATCH 1/5] bpo-40334: Disallow invalid single statements in the new parser After parsing is done, a single statement has to be checked for additional lines and a `SyntaxError` must be raised, in case there are any. Closes we-like-parsers/cpython#99. --- Lib/test/test_compile.py | 1 - Parser/pegen/pegen.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index a507ac09149189..566ca27fca893d 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -501,7 +501,6 @@ def test_single_statement(self): self.compile_single("if x:\n f(x)\nelse:\n g(x)") self.compile_single("class T:\n pass") - @support.skip_if_new_parser('Pegen does not disallow multiline single stmts') def test_bad_single_statement(self): self.assertInvalidSingle('1\n2') self.assertInvalidSingle('def f(): pass') diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c index 6f78d8c86520eb..d789073f5a8cbf 100644 --- a/Parser/pegen/pegen.c +++ b/Parser/pegen/pegen.c @@ -911,6 +911,32 @@ _PyPegen_number_token(Parser *p) p->arena); } +static int // bool +bad_single_statement(Parser *p) +{ + const char *cur = strchr(p->tok->buf, '\n'); + if (!cur) { + return 0; + } + char c = *cur; + + for (;;) { + while (c == ' ' || c == '\t' || c == '\n' || c == '\014') + c = *++cur; + + if (!c) + return 0; + + if (c != '#') { + return 1; + } + + /* Suck up comment. */ + while (c && c != '\n') + c = *++cur; + } +} + void _PyPegen_Parser_Free(Parser *p) { @@ -1014,6 +1040,11 @@ _PyPegen_run_parser(Parser *p) return NULL; } + if (p->start_rule == Py_single_input && bad_single_statement(p)) { + p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future + return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement"); + } + return res; } From a9a60a718230137a2ef821bc46d1eb2738a1a355 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 29 Apr 2020 02:59:49 +0300 Subject: [PATCH 2/5] Check if newline is allowed (preceded by linecont character or appears inside a string) --- Parser/pegen/pegen.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c index d789073f5a8cbf..5448b67a6b2f08 100644 --- a/Parser/pegen/pegen.c +++ b/Parser/pegen/pegen.c @@ -911,11 +911,25 @@ _PyPegen_number_token(Parser *p) p->arena); } +static int // bool +newline_in_string(Parser *p, const char *cur) +{ + for (char c = *cur; cur >= p->tok->buf; c = *--cur) { + if (c == '\'' || c == '"') { + return 1; + } + } + return 0; +} + static int // bool bad_single_statement(Parser *p) { const char *cur = strchr(p->tok->buf, '\n'); - if (!cur) { + + /* Newlines are allowed if preceded by a line continuation character + or if they appear inside a string. */ + if (!cur || *(cur - 1) == '\\' || newline_in_string(p, cur)) { return 0; } char c = *cur; From 4f11574957f576d5daca17b7e2b1daed9d41a19c Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 29 Apr 2020 03:55:03 +0300 Subject: [PATCH 3/5] PEP 7 --- Parser/pegen/pegen.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c index 5448b67a6b2f08..3f32a1e573d816 100644 --- a/Parser/pegen/pegen.c +++ b/Parser/pegen/pegen.c @@ -938,16 +938,18 @@ bad_single_statement(Parser *p) while (c == ' ' || c == '\t' || c == '\n' || c == '\014') c = *++cur; - if (!c) + if (!c) { return 0; + } if (c != '#') { return 1; } /* Suck up comment. */ - while (c && c != '\n') + while (c && c != '\n') { c = *++cur; + } } } From 6cbb27f26ff680e198f95e932079e89f1a5ce8c0 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 29 Apr 2020 04:14:29 +0300 Subject: [PATCH 4/5] More PEP 7 Co-Authored-By: Pablo Galindo --- Parser/pegen/pegen.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c index 3f32a1e573d816..e01a4706772f28 100644 --- a/Parser/pegen/pegen.c +++ b/Parser/pegen/pegen.c @@ -935,8 +935,9 @@ bad_single_statement(Parser *p) char c = *cur; for (;;) { - while (c == ' ' || c == '\t' || c == '\n' || c == '\014') + while (c == ' ' || c == '\t' || c == '\n' || c == '\014') { c = *++cur; + } if (!c) { return 0; From baa16b8733a642054bb7c0d5b9865c5e53e99fd9 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 29 Apr 2020 04:17:50 +0300 Subject: [PATCH 5/5] Add comment --- Parser/pegen/pegen.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c index e01a4706772f28..ecb74d96b23a88 100644 --- a/Parser/pegen/pegen.c +++ b/Parser/pegen/pegen.c @@ -922,6 +922,9 @@ newline_in_string(Parser *p, const char *cur) return 0; } +/* Check that the source for a single input statement really is a single + statement by looking at what is left in the buffer after parsing. + Trailing whitespace and comments are OK. */ static int // bool bad_single_statement(Parser *p) {