Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit dda55cc

Browse files
miss-islingtonsrinivasreddymerwokserhiy-storchaka
authored
[3.13] gh-41872: Fix quick extraction of module docstrings from a file in pydoc (GH-127520) (GH-128620)
It now supports docstrings with single quotes, escape sequences, raw string literals, and other Python syntax. (cherry picked from commit 474e419) Co-authored-by: Srinivas Reddy Thatiparthy (తాటిపర్తి శ్రీనివాస్ రెడ్డి) <[email protected]> Co-authored-by: Éric <[email protected]> Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent c0ba0bc commit dda55cc

File tree

3 files changed

+104
-15
lines changed

3 files changed

+104
-15
lines changed

Lib/pydoc.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class or function within a module or module in a package. If the
5454
# the current directory is changed with os.chdir(), an incorrect
5555
# path will be displayed.
5656

57+
import ast
5758
import __future__
5859
import builtins
5960
import importlib._bootstrap
@@ -381,21 +382,29 @@ def ispackage(path):
381382
return False
382383

383384
def source_synopsis(file):
384-
line = file.readline()
385-
while line[:1] == '#' or not line.strip():
386-
line = file.readline()
387-
if not line: break
388-
line = line.strip()
389-
if line[:4] == 'r"""': line = line[1:]
390-
if line[:3] == '"""':
391-
line = line[3:]
392-
if line[-1:] == '\\': line = line[:-1]
393-
while not line.strip():
394-
line = file.readline()
395-
if not line: break
396-
result = line.split('"""')[0].strip()
397-
else: result = None
398-
return result
385+
"""Return the one-line summary of a file object, if present"""
386+
387+
string = ''
388+
try:
389+
tokens = tokenize.generate_tokens(file.readline)
390+
for tok_type, tok_string, _, _, _ in tokens:
391+
if tok_type == tokenize.STRING:
392+
string += tok_string
393+
elif tok_type == tokenize.NEWLINE:
394+
with warnings.catch_warnings():
395+
# Ignore the "invalid escape sequence" warning.
396+
warnings.simplefilter("ignore", SyntaxWarning)
397+
docstring = ast.literal_eval(string)
398+
if not isinstance(docstring, str):
399+
return None
400+
return docstring.strip().split('\n')[0].strip()
401+
elif tok_type == tokenize.OP and tok_string in ('(', ')'):
402+
string += tok_string
403+
elif tok_type not in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
404+
return None
405+
except (tokenize.TokenError, UnicodeDecodeError, SyntaxError):
406+
return None
407+
return None
399408

400409
def synopsis(filename, cache={}):
401410
"""Get the one-line summary out of a module file."""

Lib/test/test_pydoc/test_pydoc.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import contextlib
55
import importlib.util
66
import inspect
7+
import io
78
import pydoc
89
import py_compile
910
import keyword
@@ -879,6 +880,82 @@ def test_synopsis(self):
879880
synopsis = pydoc.synopsis(TESTFN, {})
880881
self.assertEqual(synopsis, 'line 1: h\xe9')
881882

883+
def test_source_synopsis(self):
884+
def check(source, expected, encoding=None):
885+
if isinstance(source, str):
886+
source_file = StringIO(source)
887+
else:
888+
source_file = io.TextIOWrapper(io.BytesIO(source), encoding=encoding)
889+
with source_file:
890+
result = pydoc.source_synopsis(source_file)
891+
self.assertEqual(result, expected)
892+
893+
check('"""Single line docstring."""',
894+
'Single line docstring.')
895+
check('"""First line of docstring.\nSecond line.\nThird line."""',
896+
'First line of docstring.')
897+
check('"""First line of docstring.\\nSecond line.\\nThird line."""',
898+
'First line of docstring.')
899+
check('""" Whitespace around docstring. """',
900+
'Whitespace around docstring.')
901+
check('import sys\n"""No docstring"""',
902+
None)
903+
check(' \n"""Docstring after empty line."""',
904+
'Docstring after empty line.')
905+
check('# Comment\n"""Docstring after comment."""',
906+
'Docstring after comment.')
907+
check(' # Indented comment\n"""Docstring after comment."""',
908+
'Docstring after comment.')
909+
check('""""""', # Empty docstring
910+
'')
911+
check('', # Empty file
912+
None)
913+
check('"""Embedded\0null byte"""',
914+
None)
915+
check('"""Embedded null byte"""\0',
916+
None)
917+
check('"""Café and résumé."""',
918+
'Café and résumé.')
919+
check("'''Triple single quotes'''",
920+
'Triple single quotes')
921+
check('"Single double quotes"',
922+
'Single double quotes')
923+
check("'Single single quotes'",
924+
'Single single quotes')
925+
check('"""split\\\nline"""',
926+
'splitline')
927+
check('"""Unrecognized escape \\sequence"""',
928+
'Unrecognized escape \\sequence')
929+
check('"""Invalid escape seq\\uence"""',
930+
None)
931+
check('r"""Raw \\stri\\ng"""',
932+
'Raw \\stri\\ng')
933+
check('b"""Bytes literal"""',
934+
None)
935+
check('f"""f-string"""',
936+
None)
937+
check('"""Concatenated""" \\\n"string" \'literals\'',
938+
'Concatenatedstringliterals')
939+
check('"""String""" + """expression"""',
940+
None)
941+
check('("""In parentheses""")',
942+
'In parentheses')
943+
check('("""Multiple lines """\n"""in parentheses""")',
944+
'Multiple lines in parentheses')
945+
check('()', # tuple
946+
None)
947+
check(b'# coding: iso-8859-15\n"""\xa4uro sign"""',
948+
'€uro sign', encoding='iso-8859-15')
949+
check(b'"""\xa4"""', # Decoding error
950+
None, encoding='utf-8')
951+
952+
with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8') as temp_file:
953+
temp_file.write('"""Real file test."""\n')
954+
temp_file.flush()
955+
temp_file.seek(0)
956+
result = pydoc.source_synopsis(temp_file)
957+
self.assertEqual(result, "Real file test.")
958+
882959
@requires_docstrings
883960
def test_synopsis_sourceless(self):
884961
os = import_helper.import_fresh_module('os')
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix quick extraction of module docstrings from a file in :mod:`pydoc`.
2+
It now supports docstrings with single quotes, escape sequences,
3+
raw string literals, and other Python syntax.

0 commit comments

Comments
 (0)