From 4fb56c99ab43afd98cb61adbebb25bb817520789 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Thu, 22 Dec 2022 18:50:14 -0600 Subject: [PATCH 1/2] gh-100445: Improve error message for unterminated strings with escapes --- Lib/test/test_syntax.py | 10 ++++++++-- .../2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst | 1 + Parser/tokenizer.c | 13 +++++++++++-- 3 files changed, 20 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 4c988382f8b411..cef88d9759f75b 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -2262,8 +2262,14 @@ def test_error_parenthesis(self): def test_error_string_literal(self): - self._check_error("'blech", "unterminated string literal") - self._check_error('"blech', "unterminated string literal") + self._check_error("'blech", r"unterminated string literal \(.*\)$") + self._check_error('"blech', r"unterminated string literal \(.*\)$") + self._check_error( + r'"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote" + ) + self._check_error( + r'r"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote" + ) self._check_error("'''blech", "unterminated triple-quoted string literal") self._check_error('"""blech', "unterminated triple-quoted string literal") diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst b/Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst new file mode 100644 index 00000000000000..72f38849df9b82 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-12-27-02-51-45.gh-issue-100445.C8f6ph.rst @@ -0,0 +1 @@ +Improve error message for unterminated strings with escapes. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 46b7159ff0516b..ef686ae58cbe3b 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2397,6 +2397,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t int quote = c; int quote_size = 1; /* 1 or 3 */ int end_quote_size = 0; + int has_escaped_quote = 0; /* Nodes of type STRING, especially multi line strings must be handled differently in order to get both @@ -2462,8 +2463,13 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t return MAKE_TOKEN(ERRORTOKEN); } else { - syntaxerror(tok, "unterminated string literal (detected at" - " line %d)", start); + if (has_escaped_quote) { + syntaxerror(tok, "unterminated string literal (detected at" + " line %d); perhaps you escaped the end quote?", start); + } else { + syntaxerror(tok, "unterminated string literal (detected at" + " line %d)", start); + } if (c != '\n') { tok->done = E_EOLS; } @@ -2477,6 +2483,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t end_quote_size = 0; if (c == '\\') { c = tok_nextc(tok); /* skip escaped char */ + if (c == quote) { /* but record whether the escaped char was a quote */ + has_escaped_quote = 1; + } if (c == '\r') { c = tok_nextc(tok); } From 23a2a1612b02890e999c7cfd91590c90dee5e826 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Tue, 17 Oct 2023 21:49:05 -0700 Subject: [PATCH 2/2] rebase --- Parser/lexer/lexer.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 1a01bb0352a7b1..2ba24a2c2405f2 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -972,6 +972,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t int quote = c; int quote_size = 1; /* 1 or 3 */ int end_quote_size = 0; + int has_escaped_quote = 0; /* Nodes of type STRING, especially multi line strings must be handled differently in order to get both @@ -1037,8 +1038,18 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t return MAKE_TOKEN(ERRORTOKEN); } else { - _PyTokenizer_syntaxerror(tok, "unterminated string literal (detected at" - " line %d)", start); + if (has_escaped_quote) { + _PyTokenizer_syntaxerror( + tok, + "unterminated string literal (detected at line %d); " + "perhaps you escaped the end quote?", + start + ); + } else { + _PyTokenizer_syntaxerror( + tok, "unterminated string literal (detected at line %d)", start + ); + } if (c != '\n') { tok->done = E_EOLS; } @@ -1052,6 +1063,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t end_quote_size = 0; if (c == '\\') { c = tok_nextc(tok); /* skip escaped char */ + if (c == quote) { /* but record whether the escaped char was a quote */ + has_escaped_quote = 1; + } if (c == '\r') { c = tok_nextc(tok); }