From 10bdaa8e0f8541cb16c505e89a03fa9a6a128786 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Wed, 20 Jan 2021 20:12:51 +0300 Subject: [PATCH 1/5] bpo-40176: point to string opening on EOF/EOL --- Include/errcode.h | 2 -- Lib/test/test_eof.py | 24 +++++++++++++----------- Lib/test/test_exceptions.py | 2 +- Lib/test/test_fstring.py | 2 +- Parser/pegen.c | 6 ------ Parser/tokenizer.c | 17 +++++++---------- 6 files changed, 22 insertions(+), 31 deletions(-) diff --git a/Include/errcode.h b/Include/errcode.h index 790518b8b7730e..f2671d6c9b30b4 100644 --- a/Include/errcode.h +++ b/Include/errcode.h @@ -26,8 +26,6 @@ extern "C" { #define E_TOODEEP 20 /* Too many indentation levels */ #define E_DEDENT 21 /* No matching outer block for dedent */ #define E_DECODE 22 /* Error in decoding into Unicode */ -#define E_EOFS 23 /* EOF in triple-quoted string */ -#define E_EOLS 24 /* EOL in single-quoted string */ #define E_LINECONT 25 /* Unexpected characters after a line continuation */ #define E_BADSINGLE 27 /* Ill-formed single statement input */ diff --git a/Lib/test/test_eof.py b/Lib/test/test_eof.py index 2cf263d27463c4..7dae131ac62bdb 100644 --- a/Lib/test/test_eof.py +++ b/Lib/test/test_eof.py @@ -7,23 +7,25 @@ import unittest class EOFTestCase(unittest.TestCase): - def test_EOFC(self): - expect = "EOL while scanning string literal (, line 1)" - try: - eval("""'this is a test\ - """) - except SyntaxError as msg: - self.assertEqual(str(msg), expect) - else: - raise support.TestFailed + def test_EOF_single_quote(self): + expect = "unterminated string literal (, line 1)" + for quote in ("'", "\""): + try: + eval(f"""{quote}this is a test\ + """) + except SyntaxError as msg: + self.assertEqual(str(msg), expect) + self.assertEqual(msg.offset, 1) + else: + raise support.TestFailed def test_EOFS(self): - expect = ("EOF while scanning triple-quoted string literal " - "(, line 1)") + expect = ("unterminated triple-quoted string literal (, line 1)") try: eval("""'''this is a test""") except SyntaxError as msg: self.assertEqual(str(msg), expect) + self.assertEqual(msg.offset, 1) else: raise support.TestFailed diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index eb70d7b4e49724..7a8b64cba21acc 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -206,7 +206,7 @@ def testSyntaxErrorOffset(self): check(b'# -*- coding: cp1251 -*-\nPython = "\xcf\xb3\xf2\xee\xed" +', 2, 19, encoding='cp1251') check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18) - check('x = "a', 1, 7) + check('x = "a', 1, 5) check('lambda x: x = 2', 1, 1) check('f{a + b + c}', 1, 2) check('[file for str(file) in []\n])', 1, 11) diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 2345832abce624..7ca1512ebbf1bf 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -661,7 +661,7 @@ def test_parens_in_expressions(self): ["f'{3)+(4}'", ]) - self.assertAllRaise(SyntaxError, 'EOL while scanning string literal', + self.assertAllRaise(SyntaxError, 'unterminated string literal', ["f'{\n}'", ]) diff --git a/Parser/pegen.c b/Parser/pegen.c index 0d39030ea6ed18..0e7f86bc99e451 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -327,12 +327,6 @@ tokenizer_error(Parser *p) case E_TOKEN: msg = "invalid token"; break; - case E_EOFS: - RAISE_SYNTAX_ERROR("EOF while scanning triple-quoted string literal"); - return -1; - case E_EOLS: - RAISE_SYNTAX_ERROR("EOL while scanning string literal"); - return -1; case E_EOF: if (p->tok->level) { raise_unclosed_parentheses_error(p); diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index d3e846c0a5a126..68ae4e150ba12b 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1739,20 +1739,17 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) /* Get rest of string */ while (end_quote_size != quote_size) { c = tok_nextc(tok); - if (c == EOF) { + if (c == EOF || (quote_size == 1 && c == '\n')) { + tok->cur = (char *)tok->start; + tok->cur++; + tok->line_start = tok->multi_line_start; if (quote_size == 3) { - tok->done = E_EOFS; + return syntaxerror(tok, + "unterminated triple-quoted string literal"); } else { - tok->done = E_EOLS; + return syntaxerror(tok, "unterminated string literal"); } - tok->cur = tok->inp; - return ERRORTOKEN; - } - if (quote_size == 1 && c == '\n') { - tok->done = E_EOLS; - tok->cur = tok->inp; - return ERRORTOKEN; } if (c == quote) { end_quote_size += 1; From fcecf4b1025885460f6e67b83f07edb846e21713 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Wed, 20 Jan 2021 22:29:16 +0300 Subject: [PATCH 2/5] revert the lineno pointer back to the string start --- Lib/test/test_exceptions.py | 2 +- Parser/tokenizer.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 7a8b64cba21acc..21878c39f4fec9 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -238,7 +238,7 @@ def bar(): def baz(): '''quux''' - """, 9, 20) + """, 9, 24) check("pass\npass\npass\n(1+)\npass\npass\npass", 4, 4) check("(1+)", 1, 4) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 68ae4e150ba12b..d4b67fbb5583dc 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1743,6 +1743,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) tok->cur = (char *)tok->start; tok->cur++; tok->line_start = tok->multi_line_start; + tok->lineno = tok->first_lineno; if (quote_size == 3) { return syntaxerror(tok, "unterminated triple-quoted string literal"); From b52364e468e31a2578f44e3df780173ef227225b Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Wed, 20 Jan 2021 22:31:09 +0300 Subject: [PATCH 3/5] add blurb entry --- .../Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst b/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst new file mode 100644 index 00000000000000..4cc99582a41274 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst @@ -0,0 +1,2 @@ +Syntax errors for unterminated string literals now points to the start, +instead of the EOF/EOL. Patch by Batuhan Taskaya. From 6a5f7b1d10f81db849e6a8fb1a6c09af365ed01a Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Wed, 20 Jan 2021 23:42:37 +0300 Subject: [PATCH 4/5] add (detected at line blahblah) --- Lib/test/test_eof.py | 4 ++-- Parser/tokenizer.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_eof.py b/Lib/test/test_eof.py index 7dae131ac62bdb..b370e27161cee6 100644 --- a/Lib/test/test_eof.py +++ b/Lib/test/test_eof.py @@ -8,7 +8,7 @@ class EOFTestCase(unittest.TestCase): def test_EOF_single_quote(self): - expect = "unterminated string literal (, line 1)" + expect = "unterminated string literal (detected at line 1) (, line 1)" for quote in ("'", "\""): try: eval(f"""{quote}this is a test\ @@ -20,7 +20,7 @@ def test_EOF_single_quote(self): raise support.TestFailed def test_EOFS(self): - expect = ("unterminated triple-quoted string literal (, line 1)") + expect = ("unterminated triple-quoted string literal (detected at line 1) (, line 1)") try: eval("""'''this is a test""") except SyntaxError as msg: diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index d4b67fbb5583dc..d9334aaf148ba2 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1740,16 +1740,24 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) while (end_quote_size != quote_size) { c = tok_nextc(tok); if (c == EOF || (quote_size == 1 && c == '\n')) { + // shift the tok_state's location into + // the start of string, and report the error + // from the initial quote character tok->cur = (char *)tok->start; tok->cur++; tok->line_start = tok->multi_line_start; + int start = tok->lineno; tok->lineno = tok->first_lineno; + if (quote_size == 3) { return syntaxerror(tok, - "unterminated triple-quoted string literal"); + "unterminated triple-quoted string literal" + " (detected at line %d)", start); } else { - return syntaxerror(tok, "unterminated string literal"); + return syntaxerror(tok, + "unterminated string literal (detected at" + " line %d)", start); } } if (c == quote) { From ca51ef85156ef51b8f2bcc7b160a31cd9cf4a1c9 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Thu, 21 Jan 2021 00:11:50 +0300 Subject: [PATCH 5/5] fix typos --- .../2021-01-20-22-31-01.bpo-40176.anjyWw.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst b/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst index 4cc99582a41274..df7de3bdf37bc2 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2021-01-20-22-31-01.bpo-40176.anjyWw.rst @@ -1,2 +1,2 @@ -Syntax errors for unterminated string literals now points to the start, -instead of the EOF/EOL. Patch by Batuhan Taskaya. +Syntax errors for unterminated string literals now point to the start +of the string instead of reporting EOF/EOL.