Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-41872: Parse docstrings with ast instead of string manipulation #127520

Merged
merged 29 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
9e0835c
Instead of manual parsing of doc strongs, use ast.literal_eval to par…
srinivasreddy Dec 2, 2024
d9fe8dd
Add tests for doc strings
srinivasreddy Dec 2, 2024
ab176ab
Remove unnecessary comments
srinivasreddy Dec 2, 2024
c21f342
Fix typo
srinivasreddy Dec 2, 2024
d750ee5
Update Lib/pydoc.py
srinivasreddy Dec 3, 2024
38b13e5
Add breathing space
srinivasreddy Dec 3, 2024
4bb399d
Fix name collision / shadowing
srinivasreddy Dec 3, 2024
272ca67
Change the logic
srinivasreddy Dec 3, 2024
5ef8adb
Fix typo
srinivasreddy Dec 3, 2024
e306542
Update Lib/pydoc.py
srinivasreddy Dec 5, 2024
6ff1ac3
Address review comments
srinivasreddy Dec 5, 2024
8c073a5
Correct the logic
srinivasreddy Dec 5, 2024
91df3f0
Merge branch 'main' into gh-41872
srinivasreddy Dec 17, 2024
f122cc6
Add blurb
srinivasreddy Dec 17, 2024
08cf0a5
Merge branch 'main' into gh-41872
srinivasreddy Dec 18, 2024
5e6a78b
Remove the redundant binary file check
srinivasreddy Dec 18, 2024
f54711e
Update test cases
srinivasreddy Dec 18, 2024
fb5dc83
Update Lib/test/test_pydoc/test_pydoc.py
srinivasreddy Dec 18, 2024
bd0e7eb
Update Lib/test/test_pydoc/test_pydoc.py
srinivasreddy Dec 18, 2024
233fbd6
Update Lib/test/test_pydoc/test_pydoc.py
srinivasreddy Dec 18, 2024
ec7e431
Update Lib/test/test_pydoc/test_pydoc.py
srinivasreddy Dec 18, 2024
c004506
Update Lib/test/test_pydoc/test_pydoc.py
srinivasreddy Dec 18, 2024
204e5bb
Add commas
srinivasreddy Dec 19, 2024
dae74dd
Merge branch 'main' into gh-41872
srinivasreddy Jan 6, 2025
54519d9
Address review comments
srinivasreddy Jan 6, 2025
835fea2
Update Lib/test/test_pydoc/test_pydoc.py
srinivasreddy Jan 6, 2025
b7c42ea
Handle concatenated string, parentheses, newlines, and add more tests.
serhiy-storchaka Jan 7, 2025
6d6e983
Add more tests. Refactor tests.
serhiy-storchaka Jan 8, 2025
511b5d0
Update a NEWS entry.
serhiy-storchaka Jan 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions Lib/pydoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class or function within a module or module in a package. If the
# the current directory is changed with os.chdir(), an incorrect
# path will be displayed.

import ast
srinivasreddy marked this conversation as resolved.
Show resolved Hide resolved
import __future__
import builtins
import importlib._bootstrap
Expand Down Expand Up @@ -384,21 +385,20 @@ def ispackage(path):
return False

def source_synopsis(file):
line = file.readline()
while line[:1] == '#' or not line.strip():
line = file.readline()
if not line: break
line = line.strip()
if line[:4] == 'r"""': line = line[1:]
if line[:3] == '"""':
line = line[3:]
if line[-1:] == '\\': line = line[:-1]
while not line.strip():
line = file.readline()
if not line: break
result = line.split('"""')[0].strip()
else: result = None
return result
"""Return the one-line summary of a file object, if present"""

try:
tokens = tokenize.generate_tokens(file.readline)
for tok_type, tok_string, _, _, _ in tokens:
if tok_type == tokenize.STRING:
docstring = ast.literal_eval(tok_string)
if isinstance(docstring, str):
return docstring.strip().split('\n')[0].strip()
return None
elif tok_type not in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
return None
except (tokenize.TokenError, UnicodeDecodeError, SyntaxError, ValueError) as e:
return None

def synopsis(filename, cache={}):
"""Get the one-line summary out of a module file."""
Expand Down
51 changes: 51 additions & 0 deletions Lib/test/test_pydoc/test_pydoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,57 @@ def test_synopsis(self):
synopsis = pydoc.synopsis(TESTFN, {})
self.assertEqual(synopsis, 'line 1: h\xe9')

def test_source_synopsis(self):
test_cases = [
('"""Single line docstring."""',
"Single line docstring."),

('"""First line of docstring.\nSecond line.\nThird line."""',
"First line of docstring."),

('""" Whitespace around docstring. """',
"Whitespace around docstring."),

# No docstring
('x = 1\ny = 2',
None),

('# Comment\n"""Docstring after comment."""',
"Docstring after comment."),

# Empty docstring
('""""""',
""),

('"""Café and résumé."""',
"Café and résumé."),

("'''Triple single quotes'''",
"Triple single quotes"),

('"Single double quotes"',
"Single double quotes"),

("'Single single quotes'",
'Single single quotes'),

('"""Concatenated""" \\\n"string" \'literals\'',
"Concatenatedstringliterals")
srinivasreddy marked this conversation as resolved.
Show resolved Hide resolved
]

for source, expected in test_cases:
with self.subTest(source=source):
source_file = StringIO(source)
result = pydoc.source_synopsis(source_file)
self.assertEqual(result, expected)

with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8') as temp_file:
temp_file.write('"""Real file test."""\n')
temp_file.flush()
temp_file.seek(0)
result = pydoc.source_synopsis(temp_file)
self.assertEqual(result, "Real file test.")

@requires_docstrings
def test_synopsis_sourceless(self):
os = import_helper.import_fresh_module('os')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Parse docstrings with ast instead of string manipulation
Loading