Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c33f3f2

Browse files
committed
Issue #14629: Mention the filename in SyntaxError exceptions from
tokenizer.detect_encoding() (when available).
1 parent dd9a569 commit c33f3f2

3 files changed

Lines changed: 51 additions & 3 deletions

File tree

Lib/test/test_tokenize.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -904,6 +904,35 @@ def test_open(self):
904904
self.assertEqual(fp.encoding, 'utf-8-sig')
905905
self.assertEqual(fp.mode, 'r')
906906

907+
def test_filename_in_exception(self):
908+
# When possible, include the file name in the exception.
909+
path = 'some_file_path'
910+
lines = (
911+
b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
912+
)
913+
class Bunk:
914+
def __init__(self, lines, path):
915+
self.name = path
916+
self._lines = lines
917+
self._index = 0
918+
919+
def readline(self):
920+
if self._index == len(lines):
921+
raise StopIteration
922+
line = lines[self._index]
923+
self._index += 1
924+
return line
925+
926+
with self.assertRaises(SyntaxError):
927+
ins = Bunk(lines, path)
928+
# Make sure lacking a name isn't an issue.
929+
del ins.name
930+
detect_encoding(ins.readline)
931+
with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
932+
ins = Bunk(lines, path)
933+
detect_encoding(ins.readline)
934+
935+
907936
class TestTokenize(TestCase):
908937

909938
def test_tokenize(self):

Lib/tokenize.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,10 @@ def detect_encoding(readline):
353353
354354
If no encoding is specified, then the default of 'utf-8' will be returned.
355355
"""
356+
try:
357+
filename = readline.__self__.name
358+
except AttributeError:
359+
filename = None
356360
bom_found = False
357361
encoding = None
358362
default = 'utf-8'
@@ -369,7 +373,10 @@ def find_cookie(line):
369373
# per default encoding.
370374
line_string = line.decode('utf-8')
371375
except UnicodeDecodeError:
372-
raise SyntaxError("invalid or missing encoding declaration")
376+
msg = "invalid or missing encoding declaration"
377+
if filename is not None:
378+
msg = '{} for {!r}'.format(msg, filename)
379+
raise SyntaxError(msg)
373380

374381
matches = cookie_re.findall(line_string)
375382
if not matches:
@@ -379,12 +386,21 @@ def find_cookie(line):
379386
codec = lookup(encoding)
380387
except LookupError:
381388
# This behaviour mimics the Python interpreter
382-
raise SyntaxError("unknown encoding: " + encoding)
389+
if filename is None:
390+
msg = "unknown encoding: " + encoding
391+
else:
392+
msg = "unknown encoding for {!r}: {}".format(filename,
393+
encoding)
394+
raise SyntaxError(msg)
383395

384396
if bom_found:
385397
if codec.name != 'utf-8':
386398
# This behaviour mimics the Python interpreter
387-
raise SyntaxError('encoding problem: utf-8')
399+
if filename is None:
400+
msg = 'encoding problem: utf-8'
401+
else:
402+
msg = 'encoding problem for {!r}: utf-8'.format(filename)
403+
raise SyntaxError(msg)
388404
encoding += '-sig'
389405
return encoding
390406

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ Core and Builtins
5555
Library
5656
-------
5757

58+
- Issue #14629: tokenizer.detect_encoding will specify the filename in the
59+
SyntaxError exception if found at readline.__self__.name.
60+
5861
- Issue #14629: Raise SyntaxError in tokenizer.detect_encoding if the
5962
first two lines have non-UTF-8 characters without an encoding declaration.
6063

0 commit comments

Comments
 (0)