Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 05692c6

Browse files
authored
gh-96611: Fix error message for invalid UTF-8 in mid-multiline string (#96623)
1 parent 6744490 commit 05692c6

File tree

3 files changed

+16
-0
lines changed

3 files changed

+16
-0
lines changed

Lib/test/test_source_encoding.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,18 @@ def test_error_from_string(self):
147147
self.assertTrue(c.exception.args[0].startswith(expected),
148148
msg=c.exception.args[0])
149149

150+
def test_file_parse_error_multiline(self):
151+
# gh96611:
152+
with open(TESTFN, "wb") as fd:
153+
fd.write(b'print("""\n\xb1""")\n')
154+
155+
try:
156+
retcode, stdout, stderr = script_helper.assert_python_failure(TESTFN)
157+
158+
self.assertGreater(retcode, 0)
159+
self.assertIn(b"Non-UTF-8 code starting with '\\xb1'", stderr)
160+
finally:
161+
os.unlink(TESTFN)
150162

151163
class AbstractSourceEncodingTest:
152164

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
When loading a file with invalid UTF-8 inside a multi-line string, a correct
2+
SyntaxError is emitted.

Parser/tokenizer.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1936,6 +1936,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
19361936
/* Get rest of string */
19371937
while (end_quote_size != quote_size) {
19381938
c = tok_nextc(tok);
1939+
if (tok->done == E_DECODE)
1940+
break;
19391941
if (c == EOF || (quote_size == 1 && c == '\n')) {
19401942
assert(tok->multi_line_start != NULL);
19411943
// shift the tok_state's location into

0 commit comments

Comments
 (0)