Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6aa278e

Browse files
author
Victor Stinner
committed
Merged revisions 78603 via svnmerge from
svn+ssh://[email protected]/python/trunk ........ r78603 | victor.stinner | 2010-03-03 00:20:02 +0100 (mer., 03 mars 2010) | 5 lines Issue #7820: The parser tokenizer restores all bytes in the right if the BOM check fails. Fix an assertion in pydebug mode. ........
1 parent 683a7e7 commit 6aa278e

3 files changed

Lines changed: 42 additions & 23 deletions

File tree

Lib/test/test_pep263.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,17 @@ def test_issue3297(self):
4444
self.assertEqual(len(d['a']), len(d['b']))
4545
self.assertEqual(ascii(d['a']), ascii(d['b']))
4646

47+
def test_issue7820(self):
48+
# Ensure that check_bom() restores all bytes in the right order if
49+
# check_bom() fails in pydebug mode: a buffer starts with the first
50+
# byte of a valid BOM, but next bytes are different
51+
52+
# one byte in common with the UTF-16-LE BOM
53+
self.assertRaises(SyntaxError, eval, b'\xff\x20')
54+
55+
# two bytes in common with the UTF-8 BOM
56+
self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20')
57+
4758
def test_main():
4859
support.run_unittest(PEP263Test)
4960

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 1?
1212
Core and Builtins
1313
-----------------
1414

15+
- Issue #7820: The parser tokenizer restores all bytes in the right if
16+
the BOM check fails.
17+
1518
- Handle errors from looking up __prepare__ correctly.
1619

1720
- Issue #5939: Add additional runtime checking to ensure a valid capsule

Parser/tokenizer.c

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -318,46 +318,51 @@ check_bom(int get_char(struct tok_state *),
318318
int set_readline(struct tok_state *, const char *),
319319
struct tok_state *tok)
320320
{
321-
int ch = get_char(tok);
321+
int ch1, ch2, ch3;
322+
ch1 = get_char(tok);
322323
tok->decoding_state = STATE_RAW;
323-
if (ch == EOF) {
324+
if (ch1 == EOF) {
324325
return 1;
325-
} else if (ch == 0xEF) {
326-
ch = get_char(tok);
327-
if (ch != 0xBB) {
328-
unget_char(ch, tok);
329-
unget_char(0xEF, tok);
330-
/* any token beginning with '\xEF' is a bad token */
326+
} else if (ch1 == 0xEF) {
327+
ch2 = get_char(tok);
328+
if (ch2 != 0xBB) {
329+
unget_char(ch2, tok);
330+
unget_char(ch1, tok);
331331
return 1;
332332
}
333-
ch = get_char(tok);
334-
if (ch != 0xBF) {
335-
unget_char(ch, tok);
336-
unget_char(0xBB, tok);
337-
unget_char(0xEF, tok);
338-
/* any token beginning with '\xEF' is a bad token */
333+
ch3 = get_char(tok);
334+
if (ch3 != 0xBF) {
335+
unget_char(ch3, tok);
336+
unget_char(ch2, tok);
337+
unget_char(ch1, tok);
339338
return 1;
340339
}
341340
#if 0
342341
/* Disable support for UTF-16 BOMs until a decision
343342
is made whether this needs to be supported. */
344-
} else if (ch == 0xFE) {
345-
ch = get_char(tok);
346-
if (ch != 0xFF)
347-
goto NON_BOM;
343+
} else if (ch1 == 0xFE) {
344+
ch2 = get_char(tok);
345+
if (ch2 != 0xFF) {
346+
unget_char(ch2, tok);
347+
unget_char(ch1, tok);
348+
return 1;
349+
}
348350
if (!set_readline(tok, "utf-16-be"))
349351
return 0;
350352
tok->decoding_state = STATE_NORMAL;
351-
} else if (ch == 0xFF) {
352-
ch = get_char(tok);
353-
if (ch != 0xFE)
354-
goto NON_BOM;
353+
} else if (ch1 == 0xFF) {
354+
ch2 = get_char(tok);
355+
if (ch2 != 0xFE) {
356+
unget_char(ch2, tok);
357+
unget_char(ch1, tok);
358+
return 1;
359+
}
355360
if (!set_readline(tok, "utf-16-le"))
356361
return 0;
357362
tok->decoding_state = STATE_NORMAL;
358363
#endif
359364
} else {
360-
unget_char(ch, tok);
365+
unget_char(ch1, tok);
361366
return 1;
362367
}
363368
if (tok->encoding != NULL)

0 commit comments

Comments
 (0)