File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -904,6 +904,35 @@ def test_open(self):
904904 self .assertEqual (fp .encoding , 'utf-8-sig' )
905905 self .assertEqual (fp .mode , 'r' )
906906
907+ def test_filename_in_exception (self ):
908+ # When possible, include the file name in the exception.
909+ path = 'some_file_path'
910+ lines = (
911+ b'print("\xdf ")' , # Latin-1: LATIN SMALL LETTER SHARP S
912+ )
913+ class Bunk :
914+ def __init__ (self , lines , path ):
915+ self .name = path
916+ self ._lines = lines
917+ self ._index = 0
918+
919+ def readline (self ):
920+ if self ._index == len (lines ):
921+ raise StopIteration
922+ line = lines [self ._index ]
923+ self ._index += 1
924+ return line
925+
926+ with self .assertRaises (SyntaxError ):
927+ ins = Bunk (lines , path )
928+ # Make sure lacking a name isn't an issue.
929+ del ins .name
930+ detect_encoding (ins .readline )
931+ with self .assertRaisesRegex (SyntaxError , '.*{}' .format (path )):
932+ ins = Bunk (lines , path )
933+ detect_encoding (ins .readline )
934+
935+
907936class TestTokenize (TestCase ):
908937
909938 def test_tokenize (self ):
Original file line number Diff line number Diff line change @@ -353,6 +353,10 @@ def detect_encoding(readline):
353353
354354 If no encoding is specified, then the default of 'utf-8' will be returned.
355355 """
356+ try :
357+ filename = readline .__self__ .name
358+ except AttributeError :
359+ filename = None
356360 bom_found = False
357361 encoding = None
358362 default = 'utf-8'
@@ -369,7 +373,10 @@ def find_cookie(line):
369373 # per default encoding.
370374 line_string = line .decode ('utf-8' )
371375 except UnicodeDecodeError :
372- raise SyntaxError ("invalid or missing encoding declaration" )
376+ msg = "invalid or missing encoding declaration"
377+ if filename is not None :
378+ msg = '{} for {!r}' .format (msg , filename )
379+ raise SyntaxError (msg )
373380
374381 matches = cookie_re .findall (line_string )
375382 if not matches :
@@ -379,12 +386,21 @@ def find_cookie(line):
379386 codec = lookup (encoding )
380387 except LookupError :
381388 # This behaviour mimics the Python interpreter
382- raise SyntaxError ("unknown encoding: " + encoding )
389+ if filename is None :
390+ msg = "unknown encoding: " + encoding
391+ else :
392+ msg = "unknown encoding for {!r}: {}" .format (filename ,
393+ encoding )
394+ raise SyntaxError (msg )
383395
384396 if bom_found :
385397 if codec .name != 'utf-8' :
386398 # This behaviour mimics the Python interpreter
387- raise SyntaxError ('encoding problem: utf-8' )
399+ if filename is None :
400+ msg = 'encoding problem: utf-8'
401+ else :
402+ msg = 'encoding problem for {!r}: utf-8' .format (filename )
403+ raise SyntaxError (msg )
388404 encoding += '-sig'
389405 return encoding
390406
Original file line number Diff line number Diff line change @@ -55,6 +55,9 @@ Core and Builtins
5555Library
5656-------
5757
58+ - Issue #14629: tokenizer.detect_encoding will specify the filename in the
59+ SyntaxError exception if found at readline.__self__.name.
60+
5861- Issue #14629: Raise SyntaxError in tokenizer.detect_encoding if the
5962 first two lines have non-UTF-8 characters without an encoding declaration.
6063
You can’t perform that action at this time.
0 commit comments