Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d73aca7

Browse files
committed
do not call into python api if an exception is set (#24022)
1 parent 6de708f commit d73aca7

3 files changed

Lines changed: 18 additions & 5 deletions

File tree

Lib/test/test_compile.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import math
2+
import os
23
import unittest
34
import sys
45
import _ast
6+
import tempfile
57
import types
6-
from test import support
8+
from test import support, script_helper
79

810
class TestSpecifics(unittest.TestCase):
911

@@ -492,6 +494,16 @@ def test_bad_single_statement(self):
492494
self.assertInvalidSingle('f()\nxy # blah\nblah()')
493495
self.assertInvalidSingle('x = 5 # comment\nx = 6\n')
494496

497+
def test_particularly_evil_undecodable(self):
498+
# Issue 24022
499+
src = b'0000\x00\n00000000000\n\x00\n\x9e\n'
500+
with tempfile.TemporaryDirectory() as tmpd:
501+
fn = os.path.join(tmpd, "bad.py")
502+
with open(fn, "wb") as fp:
503+
fp.write(src)
504+
res = script_helper.run_python_until_end(fn)[0]
505+
self.assertIn(b"Non-UTF-8", res.err)
506+
495507
@support.cpython_only
496508
def test_compiler_recursion_limit(self):
497509
# Expected limit is sys.getrecursionlimit() * the scaling factor

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ Release date: tba
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #24022: Fix tokenizer crash when processing undecodable source code.
14+
1315
- Issue #23309: Avoid a deadlock at shutdown if a daemon thread is aborted
1416
while it is holding a lock to a buffered I/O object, and the main thread
1517
tries to use the same I/O object (typically stdout or stderr). A fatal

Parser/tokenizer.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1301,6 +1301,8 @@ verify_identifier(struct tok_state *tok)
13011301
{
13021302
PyObject *s;
13031303
int result;
1304+
if (tok->decoding_erred)
1305+
return 0;
13041306
s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
13051307
if (s == NULL || PyUnicode_READY(s) == -1) {
13061308
if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
@@ -1469,11 +1471,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
14691471
c = tok_nextc(tok);
14701472
}
14711473
tok_backup(tok, c);
1472-
if (nonascii &&
1473-
!verify_identifier(tok)) {
1474-
tok->done = E_IDENTIFIER;
1474+
if (nonascii && !verify_identifier(tok))
14751475
return ERRORTOKEN;
1476-
}
14771476
*p_start = tok->start;
14781477
*p_end = tok->cur;
14791478
return NAME;

0 commit comments

Comments
 (0)