Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0d44111

Browse files
Issue #25388: Fixed tokenizer crash when processing undecodable source code
with a null byte.
1 parent 806fb25 commit 0d44111

3 files changed

Lines changed: 19 additions & 8 deletions

File tree

Lib/test/test_compile.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,16 @@ def test_particularly_evil_undecodable(self):
504504
res = script_helper.run_python_until_end(fn)[0]
505505
self.assertIn(b"Non-UTF-8", res.err)
506506

507+
def test_yet_more_evil_still_undecodable(self):
508+
# Issue #25388
509+
src = b"#\x00\n#\xfd\n"
510+
with tempfile.TemporaryDirectory() as tmpd:
511+
fn = os.path.join(tmpd, "bad.py")
512+
with open(fn, "wb") as fp:
513+
fp.write(src)
514+
res = script_helper.run_python_until_end(fn)[0]
515+
self.assertIn(b"Non-UTF-8", res.err)
516+
507517
@support.cpython_only
508518
def test_compiler_recursion_limit(self):
509519
# Expected limit is sys.getrecursionlimit() * the scaling factor

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ Release date: tba
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #25388: Fixed tokenizer crash when processing undecodable source code
14+
with a null byte.
15+
1316
- Issue #22995: Default implementation of __reduce__ and __reduce_ex__ now
1417
rejects builtin types with not defined __new__.
1518

Parser/tokenizer.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,8 @@ error_ret(struct tok_state *tok) /* XXX */
187187
tok->decoding_erred = 1;
188188
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
189189
PyMem_FREE(tok->buf);
190-
tok->buf = NULL;
190+
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
191+
tok->done = E_DECODE;
191192
return NULL; /* as if it were EOF */
192193
}
193194

@@ -943,11 +944,6 @@ tok_nextc(struct tok_state *tok)
943944
}
944945
buflen = PyBytes_GET_SIZE(u);
945946
buf = PyBytes_AS_STRING(u);
946-
if (!buf) {
947-
Py_DECREF(u);
948-
tok->done = E_DECODE;
949-
return EOF;
950-
}
951947
newtok = PyMem_MALLOC(buflen+1);
952948
strcpy(newtok, buf);
953949
Py_DECREF(u);
@@ -989,7 +985,6 @@ tok_nextc(struct tok_state *tok)
989985
if (tok->buf != NULL)
990986
PyMem_FREE(tok->buf);
991987
tok->buf = newtok;
992-
tok->line_start = tok->buf;
993988
tok->cur = tok->buf;
994989
tok->line_start = tok->buf;
995990
tok->inp = strchr(tok->buf, '\0');
@@ -1012,7 +1007,8 @@ tok_nextc(struct tok_state *tok)
10121007
}
10131008
if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
10141009
tok) == NULL) {
1015-
tok->done = E_EOF;
1010+
if (!tok->decoding_erred)
1011+
tok->done = E_EOF;
10161012
done = 1;
10171013
}
10181014
else {
@@ -1046,6 +1042,8 @@ tok_nextc(struct tok_state *tok)
10461042
return EOF;
10471043
}
10481044
tok->buf = newbuf;
1045+
tok->cur = tok->buf + cur;
1046+
tok->line_start = tok->cur;
10491047
tok->inp = tok->buf + curvalid;
10501048
tok->end = tok->buf + newsize;
10511049
tok->start = curstart < 0 ? NULL :

0 commit comments

Comments
 (0)