File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -44,6 +44,17 @@ def test_issue3297(self):
4444 self .assertEqual (len (d ['a' ]), len (d ['b' ]))
4545 self .assertEqual (ascii (d ['a' ]), ascii (d ['b' ]))
4646
47+ def test_issue7820 (self ):
48+ # Ensure that check_bom() restores all bytes in the right order if
49+ # check_bom() fails in pydebug mode: a buffer starts with the first
50+ # byte of a valid BOM, but next bytes are different
51+
52+ # one byte in common with the UTF-16-LE BOM
53+ self .assertRaises (SyntaxError , eval , b'\xff \x20 ' )
54+
55+ # two bytes in common with the UTF-8 BOM
56+ self .assertRaises (SyntaxError , eval , b'\xef \xbb \x20 ' )
57+
4758def test_main ():
4859 support .run_unittest (PEP263Test )
4960
Original file line number Diff line number Diff line change @@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 1?
1212Core and Builtins
1313-----------------
1414
15+ - Issue #7820: The parser tokenizer restores all bytes in the right if
16+ the BOM check fails.
17+
1518- Handle errors from looking up __prepare__ correctly.
1619
1720- Issue #5939: Add additional runtime checking to ensure a valid capsule
Original file line number Diff line number Diff line change @@ -318,46 +318,51 @@ check_bom(int get_char(struct tok_state *),
318318 int set_readline (struct tok_state * , const char * ),
319319 struct tok_state * tok )
320320{
321- int ch = get_char (tok );
321+ int ch1 , ch2 , ch3 ;
322+ ch1 = get_char (tok );
322323 tok -> decoding_state = STATE_RAW ;
323- if (ch == EOF ) {
324+ if (ch1 == EOF ) {
324325 return 1 ;
325- } else if (ch == 0xEF ) {
326- ch = get_char (tok );
327- if (ch != 0xBB ) {
328- unget_char (ch , tok );
329- unget_char (0xEF , tok );
330- /* any token beginning with '\xEF' is a bad token */
326+ } else if (ch1 == 0xEF ) {
327+ ch2 = get_char (tok );
328+ if (ch2 != 0xBB ) {
329+ unget_char (ch2 , tok );
330+ unget_char (ch1 , tok );
331331 return 1 ;
332332 }
333- ch = get_char (tok );
334- if (ch != 0xBF ) {
335- unget_char (ch , tok );
336- unget_char (0xBB , tok );
337- unget_char (0xEF , tok );
338- /* any token beginning with '\xEF' is a bad token */
333+ ch3 = get_char (tok );
334+ if (ch3 != 0xBF ) {
335+ unget_char (ch3 , tok );
336+ unget_char (ch2 , tok );
337+ unget_char (ch1 , tok );
339338 return 1 ;
340339 }
341340#if 0
342341 /* Disable support for UTF-16 BOMs until a decision
343342 is made whether this needs to be supported. */
344- } else if (ch == 0xFE ) {
345- ch = get_char (tok );
346- if (ch != 0xFF )
347- goto NON_BOM ;
343+ } else if (ch1 == 0xFE ) {
344+ ch2 = get_char (tok );
345+ if (ch2 != 0xFF ) {
346+ unget_char (ch2 , tok );
347+ unget_char (ch1 , tok );
348+ return 1 ;
349+ }
348350 if (!set_readline (tok , "utf-16-be" ))
349351 return 0 ;
350352 tok -> decoding_state = STATE_NORMAL ;
351- } else if (ch == 0xFF ) {
352- ch = get_char (tok );
353- if (ch != 0xFE )
354- goto NON_BOM ;
353+ } else if (ch1 == 0xFF ) {
354+ ch2 = get_char (tok );
355+ if (ch2 != 0xFE ) {
356+ unget_char (ch2 , tok );
357+ unget_char (ch1 , tok );
358+ return 1 ;
359+ }
355360 if (!set_readline (tok , "utf-16-le" ))
356361 return 0 ;
357362 tok -> decoding_state = STATE_NORMAL ;
358363#endif
359364 } else {
360- unget_char (ch , tok );
365+ unget_char (ch1 , tok );
361366 return 1 ;
362367 }
363368 if (tok -> encoding != NULL )
You can’t perform that action at this time.
0 commit comments