From fa7a709a44c322c77877d3a92550dfaa87a409e7 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 3 May 2023 17:02:34 +0200 Subject: [PATCH 1/2] gh-97556: Raise null bytes syntax error upon null in multiline string --- Lib/test/test_cmd_line_script.py | 12 ++++++++++++ Parser/tokenizer.c | 6 +++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index d98e23855e0c19..0e01f36bfc3369 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -669,6 +669,18 @@ def test_syntaxerror_null_bytes(self): ], ) + def test_syntaxerror_null_bytes_in_multiline_string(self): + script = "\n'''\nmultilinestring\0\n'''" + with os_helper.temp_dir() as script_dir: + script_name = _make_test_script(script_dir, 'script', script) + _, _, stderr = assert_python_failure(script_name) + self.assertEqual( + stderr.splitlines()[-2:], + [ b" multilinestring", + b'SyntaxError: source code cannot contain null bytes' + ] + ) + def test_consistent_sys_path_for_direct_execution(self): # This test case ensures that the following all give the same # sys.path configuration: diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index d2f9fee110ebf5..09bb5f72dd4f64 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2301,8 +2301,12 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t /* Get rest of string */ while (end_quote_size != quote_size) { c = tok_nextc(tok); - if (tok->done == E_DECODE) + if (tok->done == E_ERROR) { + return MAKE_TOKEN(ERRORTOKEN); + } + if (tok->done == E_DECODE) { break; + } if (c == EOF || (quote_size == 1 && c == '\n')) { assert(tok->multi_line_start != NULL); // shift the tok_state's location into From 78fbd3ca6185d3483fab191fc8c9be55b093ba0c Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 3 May 2023 17:43:49 +0200 Subject: [PATCH 2/2] Handle f-strings correctly as well --- Lib/test/test_cmd_line_script.py | 19 ++++++++++--------- Parser/tokenizer.c | 3 +++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index 0e01f36bfc3369..8bf299382e9ca4 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -670,16 +670,17 @@ def test_syntaxerror_null_bytes(self): ) def test_syntaxerror_null_bytes_in_multiline_string(self): - script = "\n'''\nmultilinestring\0\n'''" + scripts = ["\n'''\nmultilinestring\0\n'''", "\nf'''\nmultilinestring\0\n'''"] # Both normal and f-strings with os_helper.temp_dir() as script_dir: - script_name = _make_test_script(script_dir, 'script', script) - _, _, stderr = assert_python_failure(script_name) - self.assertEqual( - stderr.splitlines()[-2:], - [ b" multilinestring", - b'SyntaxError: source code cannot contain null bytes' - ] - ) + for script in scripts: + script_name = _make_test_script(script_dir, 'script', script) + _, _, stderr = assert_python_failure(script_name) + self.assertEqual( + stderr.splitlines()[-2:], + [ b" multilinestring", + b'SyntaxError: source code cannot contain null bytes' + ] + ) def test_consistent_sys_path_for_direct_execution(self): # This test case ensures that the following all give the same diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 09bb5f72dd4f64..7c07d2011fda61 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2558,6 +2558,9 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct while (end_quote_size != current_tok->f_string_quote_size) { int c = tok_nextc(tok); + if (tok->done == E_ERROR) { + return MAKE_TOKEN(ERRORTOKEN); + } if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { if (tok->decoding_erred) { return MAKE_TOKEN(ERRORTOKEN);