Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0304729

Browse files
Issue #25388: Fixed tokenizer crash when processing undecodable source code
with a null byte.
2 parents 28a465c + 7e2b870 commit 0304729

3 files changed

Lines changed: 19 additions & 8 deletions

File tree

Lib/test/test_compile.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,16 @@ def test_particularly_evil_undecodable(self):
516516
res = script_helper.run_python_until_end(fn)[0]
517517
self.assertIn(b"Non-UTF-8", res.err)
518518

519+
def test_yet_more_evil_still_undecodable(self):
520+
# Issue #25388
521+
src = b"#\x00\n#\xfd\n"
522+
with tempfile.TemporaryDirectory() as tmpd:
523+
fn = os.path.join(tmpd, "bad.py")
524+
with open(fn, "wb") as fp:
525+
fp.write(src)
526+
res = script_helper.run_python_until_end(fn)[0]
527+
self.assertIn(b"Non-UTF-8", res.err)
528+
519529
@support.cpython_only
520530
def test_compiler_recursion_limit(self):
521531
# Expected limit is sys.getrecursionlimit() * the scaling factor

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ Release date: XXXX-XX-XX
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #25388: Fixed tokenizer crash when processing undecodable source code
14+
with a null byte.
15+
1316
- Issue #25462: The hash of the key now is calculated only once in most
1417
operations in C implementation of OrderedDict.
1518

Parser/tokenizer.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,8 @@ error_ret(struct tok_state *tok) /* XXX */
196196
tok->decoding_erred = 1;
197197
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
198198
PyMem_FREE(tok->buf);
199-
tok->buf = NULL;
199+
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
200+
tok->done = E_DECODE;
200201
return NULL; /* as if it were EOF */
201202
}
202203

@@ -952,11 +953,6 @@ tok_nextc(struct tok_state *tok)
952953
}
953954
buflen = PyBytes_GET_SIZE(u);
954955
buf = PyBytes_AS_STRING(u);
955-
if (!buf) {
956-
Py_DECREF(u);
957-
tok->done = E_DECODE;
958-
return EOF;
959-
}
960956
newtok = PyMem_MALLOC(buflen+1);
961957
strcpy(newtok, buf);
962958
Py_DECREF(u);
@@ -998,7 +994,6 @@ tok_nextc(struct tok_state *tok)
998994
if (tok->buf != NULL)
999995
PyMem_FREE(tok->buf);
1000996
tok->buf = newtok;
1001-
tok->line_start = tok->buf;
1002997
tok->cur = tok->buf;
1003998
tok->line_start = tok->buf;
1004999
tok->inp = strchr(tok->buf, '\0');
@@ -1021,7 +1016,8 @@ tok_nextc(struct tok_state *tok)
10211016
}
10221017
if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
10231018
tok) == NULL) {
1024-
tok->done = E_EOF;
1019+
if (!tok->decoding_erred)
1020+
tok->done = E_EOF;
10251021
done = 1;
10261022
}
10271023
else {
@@ -1055,6 +1051,8 @@ tok_nextc(struct tok_state *tok)
10551051
return EOF;
10561052
}
10571053
tok->buf = newbuf;
1054+
tok->cur = tok->buf + cur;
1055+
tok->line_start = tok->cur;
10581056
tok->inp = tok->buf + curvalid;
10591057
tok->end = tok->buf + newsize;
10601058
tok->start = curstart < 0 ? NULL :

0 commit comments

Comments
 (0)