diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 7eb7355..9ad7f68 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -5,23 +5,39 @@ on: types: [created] jobs: - deploy: + build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.8 - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + - name: Set up Python 3.9 + uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: 3.9 - name: Install build dependencies run: | - python -m pip install --upgrade pip wheel + python -m pip install --upgrade pip wheel build - name: Build package run: | - python setup.py sdist bdist_wheel - - name: Publish a Python distribution to PyPI - uses: pypa/gh-action-pypi-publish@v1.4.1 + python -m build + - name: Upload Artifact + uses: actions/upload-artifact@v4 with: - user: __token__ - password: ${{ secrets.PYPI_UPLOAD_API_TOKEN }} - + name: docstring-to-markdown dist ${{ github.run_number }} + path: ./dist + pypi-publish: + name: Upload release to PyPI + runs-on: ubuntu-latest + needs: [build] + environment: + name: pypi + url: https://pypi.org/p/docstring-to-markdown + permissions: + id-token: write + steps: + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: docstring-to-markdown dist ${{ github.run_number }} + path: ./dist + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6ccb4e8..8bd7e07 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -11,20 +11,17 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python-version: [3.7, 3.8, 3.9, '3.10', '3.11'] - include: - - os: ubuntu-20.04 - python-version: 3.6 + python-version: [3.8, 3.9, '3.10', '3.11', '3.12', '3.13'] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install test dependencies run: | - python -m pip install --upgrade pip wheel + python -m pip install --upgrade pip wheel build python -m pip install -r requirements-dev.txt - name: Temporary installation run: python -m pip install -e . @@ -36,8 +33,8 @@ jobs: mypy docstring_to_markdown - name: Build package run: | - python setup.py sdist bdist_wheel + python -m build - name: Install package - run: python -m pip install --find-links=dist --no-index --ignore-installed docstring_to_markdown + run: python -m pip install --find-links=dist --ignore-installed docstring_to_markdown - name: Pip check run: python -m pip check diff --git a/README.md b/README.md index 2f09ef6..97a3703 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,9 @@ On the fly conversion of Python docstrings to markdown -- Python 3.6+ -- currently can recognise reStructuredText and convert multiple of its features to Markdown -- in the future will be able to convert Google docstrings too +- Python 3.7+ (tested on 3.8 up to 3.13) +- can recognise reStructuredText and convert multiple of its features to Markdown +- since v0.13 includes initial support for Google-formatted docstrings ### Installation @@ -16,7 +16,6 @@ On the fly conversion of Python docstrings to markdown pip install docstring-to-markdown ``` - ### Example Convert reStructuredText: @@ -36,6 +35,11 @@ Traceback (most recent call last): docstring_to_markdown.UnknownFormatError ``` +### Extensibility + +`docstring_to_markdown` entry point group allows to add custom converters which follow the `Converter` protocol. +The built-in converters can be customized by providing entry point with matching name. + ### Development ```bash diff --git a/docstring_to_markdown/__init__.py b/docstring_to_markdown/__init__.py index c81e124..a70ed84 100644 --- a/docstring_to_markdown/__init__.py +++ b/docstring_to_markdown/__init__.py @@ -1,13 +1,59 @@ -from .rst import looks_like_rst, rst_to_markdown +from importlib_metadata import entry_points +from typing import List, TYPE_CHECKING -__version__ = "0.12" +from .types import Converter + +if TYPE_CHECKING: + from importlib_metadata import EntryPoint + +__version__ = "0.17" class UnknownFormatError(Exception): pass +def _entry_points_sort_key(entry_point: 'EntryPoint'): + if entry_point.dist is None: + return 1 + if entry_point.dist.name == "docstring-to-markdown": + return 0 + return 1 + + +def _load_converters() -> List[Converter]: + converter_entry_points = entry_points( + group="docstring_to_markdown" + ) + # sort so that the default ones can be overridden + sorted_entry_points = sorted( + converter_entry_points, + key=_entry_points_sort_key + ) + # de-duplicate + unique_entry_points = {} + for entry_point in sorted_entry_points: + unique_entry_points[entry_point.name] = entry_point + + converters = [] + for entry_point in unique_entry_points.values(): + converter_class = entry_point.load() + converters.append(converter_class()) + + converters.sort(key=lambda converter: -converter.priority) + + return converters + + +_CONVERTERS = None + + def convert(docstring: str) -> str: - if looks_like_rst(docstring): - return rst_to_markdown(docstring) + global _CONVERTERS + if _CONVERTERS is None: + _CONVERTERS = _load_converters() + for converter in _CONVERTERS: + if converter.can_convert(docstring): + return converter.convert(docstring) + raise UnknownFormatError() diff --git a/docstring_to_markdown/_utils.py b/docstring_to_markdown/_utils.py new file mode 100644 index 0000000..847c699 --- /dev/null +++ b/docstring_to_markdown/_utils.py @@ -0,0 +1,5 @@ +from re import sub + + +def escape_markdown(text: str) -> str: + return sub(r'([\\#*_[\]])', r'\\\1', text) diff --git a/docstring_to_markdown/cpython.py b/docstring_to_markdown/cpython.py new file mode 100644 index 0000000..974ea60 --- /dev/null +++ b/docstring_to_markdown/cpython.py @@ -0,0 +1,60 @@ +from typing import Union, List +from re import fullmatch + +from .types import Converter +from ._utils import escape_markdown + + +def _is_cpython_signature_line(line: str) -> bool: + """CPython uses signature lines in the following format: + + str(bytes_or_buffer[, encoding[, errors]]) -> str + """ + return fullmatch(r'\w+\(\S*(, \S+)*(\[, \S+\])*\)\s--?>\s.+', line) is not None + + +def cpython_to_markdown(text: str) -> Union[str, None]: + signature_lines: List[str] = [] + other_lines: List[str] = [] + for line in text.splitlines(): + if not other_lines and _is_cpython_signature_line(line): + signature_lines.append(line) + elif not signature_lines: + return None + elif line.startswith(' '): + signature_lines.append(line) + else: + other_lines.append(line) + return '\n'.join([ + '```', + '\n'.join(signature_lines), + '```', + escape_markdown('\n'.join(other_lines)) + ]) + + +def looks_like_cpython(text: str) -> bool: + return cpython_to_markdown(text) is not None + + +class CPythonConverter(Converter): + + priority = 10 + + def __init__(self) -> None: + self._last_docstring: Union[str, None] = None + self._converted: Union[str, None] = None + + def can_convert(self, docstring): + self._last_docstring = docstring + self._converted = cpython_to_markdown(docstring) + return self._converted is not None + + def convert(self, docstring): + if docstring != self._last_docstring: + self._last_docstring = docstring + self._converted = cpython_to_markdown(docstring) + return self._converted + + +__all__ = ['looks_like_cpython', 'cpython_to_markdown', 'CPythonConverter'] diff --git a/docstring_to_markdown/google.py b/docstring_to_markdown/google.py new file mode 100644 index 0000000..156f3da --- /dev/null +++ b/docstring_to_markdown/google.py @@ -0,0 +1,185 @@ +import re +from textwrap import dedent +from typing import List + +from .types import Converter + + +# All possible sections in Google style docstrings +SECTION_HEADERS: List[str] = [ + "Args", + "Returns", + "Raises", + "Yields", + "Example", + "Examples", + "Attributes", + "Note", + "Todo", +] + +# These sections will not be parsed as a list of arguments/return values/etc +PLAIN_TEXT_SECTIONS: List[str] = [ + "Examples", + "Example", + "Note", + "Todo", +] + +ESCAPE_RULES = { + # Avoid Markdown in magic methods or filenames like __init__.py + r"__(?P\S+)__": r"\_\_\g\_\_", +} + + +class Section: + def __init__(self, name: str, content: str) -> None: + self.name = name + self.content = "" + + self._parse(content) + + def _parse(self, content: str) -> None: + content = content.rstrip("\n") + + if self.name in PLAIN_TEXT_SECTIONS: + self.content = dedent(content) + return + + parts = [] + cur_part = [] + + for line in content.split("\n"): + line = line.replace(" ", "", 1) + + if line.startswith(" "): + # Continuation from a multiline description + cur_part.append(line) + continue + + if cur_part: + # Leaving multiline description + parts.append(cur_part) + cur_part = [line] + else: + # Entering new description part + cur_part.append(line) + + # Last part + parts.append(cur_part) + + # Format section + for part in parts: + indentation = "" + skip_first = False + + if ":" in part[0]: + spl = part[0].split(":") + + arg = spl[0] + description = ":".join(spl[1:]).lstrip() + indentation = (len(arg) + 6) * " " + + if description: + self.content += "- `{}`: {}\n".format(arg, description) + else: + skip_first = True + self.content += "- `{}`: ".format(arg) + else: + self.content += "- {}\n".format(part[0]) + + for n, line in enumerate(part[1:]): + if skip_first and n == 0: + # This ensures that indented args get moved to the + # previous line + self.content += "{}\n".format(line.lstrip()) + continue + + self.content += "{}{}\n".format(indentation, line.lstrip()) + + self.content = self.content.rstrip("\n") + + def as_markdown(self) -> str: + return "#### {}\n\n{}\n\n".format(self.name, self.content) + + +class GoogleDocstring: + def __init__(self, docstring: str) -> None: + self.sections: List[Section] = [] + self.description: str = "" + + self._parse(docstring) + + def _parse(self, docstring: str) -> None: + self.sections = [] + self.description = "" + + buf = "" + cur_section = "" + + for line in docstring.split("\n"): + if is_section(line): + # Entering new section + if cur_section: + # Leaving previous section, save it and reset buffer + self.sections.append(Section(cur_section, buf)) + buf = "" + + # Remember currently parsed section + cur_section = line.rstrip(":") + continue + + # Parse section content + if cur_section: + buf += line + "\n" + else: + # Before setting cur_section, we're parsing the function description + self.description += line + "\n" + + # Last section + self.sections.append(Section(cur_section, buf)) + + def as_markdown(self) -> str: + text = self.description + + for section in self.sections: + text += section.as_markdown() + + return text.rstrip("\n") + "\n" # Only keep one last newline + + +def is_section(line: str) -> bool: + for section in SECTION_HEADERS: + if re.search(r"{}:".format(section), line): + return True + + return False + + +def looks_like_google(value: str) -> bool: + for section in SECTION_HEADERS: + if re.search(r"{}:\n".format(section), value): + return True + + return False + + +def google_to_markdown(text: str, extract_signature: bool = True) -> str: + # Escape parts we don't want to render + for pattern, replacement in ESCAPE_RULES.items(): + text = re.sub(pattern, replacement, text) + + docstring = GoogleDocstring(text) + + return docstring.as_markdown() + + +class GoogleConverter(Converter): + + priority = 75 + + def can_convert(self, docstring): + return looks_like_google(docstring) + + def convert(self, docstring): + return google_to_markdown(docstring) diff --git a/docstring_to_markdown/plain.py b/docstring_to_markdown/plain.py new file mode 100644 index 0000000..3c42253 --- /dev/null +++ b/docstring_to_markdown/plain.py @@ -0,0 +1,40 @@ +from re import fullmatch +from .types import Converter +from ._utils import escape_markdown + + +def looks_like_plain_text(value: str) -> bool: + """Check if given string has plain text following English syntax without need for escaping. + + Accepts: + - words without numbers + - full stop, bangs and question marks at the end of a word if followed by a space or end of string + - commas, colons and semicolons if after a word and followed by a space + - dashes between words (like in `e-mail`) + - double and single quotes if proceeded with a space and followed by a word, or if proceeded by a word and followed by a space (or end of string); single quotes are also allowed in between two words + - parentheses if opening preceded by space and closing followed by space or end + + Does not accept: + - square brackets (used in markdown a lot) + """ + if '_' in value: + return False + return fullmatch(r"((\w[\.!\?\)'\"](\s|$))|(\w[,:;]\s)|(\w[-']\w)|(\w\s['\"\(])|\w|\s)+", value) is not None + + +def plain_text_to_markdown(text: str) -> str: + return escape_markdown(text) + + +class PlainTextConverter(Converter): + + priority = 50 + + def can_convert(self, docstring): + return looks_like_plain_text(docstring) + + def convert(self, docstring): + return plain_text_to_markdown(docstring) + + +__all__ = ['looks_like_plain_text', 'plain_text_to_markdown', 'PlainTextConverter'] diff --git a/docstring_to_markdown/rst.py b/docstring_to_markdown/rst.py index 174f9de..040b26d 100644 --- a/docstring_to_markdown/rst.py +++ b/docstring_to_markdown/rst.py @@ -1,13 +1,16 @@ from abc import ABC, abstractmethod from enum import IntEnum, auto +from textwrap import dedent from types import SimpleNamespace -from typing import Union, List, Dict +from typing import Callable, Match, Union, List, Dict import re +from .types import Converter + class Directive: def __init__( - self, pattern: str, replacement: str, + self, pattern: str, replacement: Union[str, Callable[[Match], str]], name: Union[str, None] = None, flags: int = 0 ): @@ -249,7 +252,7 @@ def inline_markdown(self): ), Directive( pattern=r'`(?P