diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 000000000..ad847062b --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,73 @@ +name: Deploy + +on: + push: + branches: [main] + pull_request: + release: + types: + - published + workflow_dispatch: + +permissions: + contents: read + +jobs: + # Always build & lint package. + build-package: + name: Build & verify package + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: hynek/build-and-inspect-python-package@v1 + + # Upload to Test PyPI on every commit on main. + release-test-pypi: + name: Publish in-dev package to test.pypi.org + if: | + github.repository_owner == 'sphinx-contrib' + && github.event_name == 'push' + && github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + needs: build-package + + permissions: + id-token: write + + steps: + - name: Download packages built by build-and-inspect-python-package + uses: actions/download-artifact@v3 + with: + name: Packages + path: dist + + - name: Upload package to Test PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + + # Upload to real PyPI on GitHub Releases. + release-pypi: + name: Publish released package to pypi.org + if: | + github.repository_owner == 'sphinx-contrib' + && github.event.action == 'published' + runs-on: ubuntu-latest + needs: build-package + + permissions: + id-token: write + + steps: + - name: Download packages built by build-and-inspect-python-package + uses: actions/download-artifact@v3 + with: + name: Packages + path: dist + + - name: Upload package to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 60192c703..84a157e3b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"] + # when adding new versions, update the one used to test + # friend projects below to the latest one + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] os: [ubuntu-latest, macos-latest, windows-latest] steps: - uses: actions/checkout@v3 @@ -21,6 +23,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + allow-prereleases: true cache: pip cache-dependency-path: .github/workflows/tests.yml - name: Upgrade pip @@ -30,7 +33,7 @@ jobs: - name: Install sphinx-lint to pull dependencies run: python -m pip install -v . - name: Download more tests from friend projects - if: matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest' + if: matrix.python-version == '3.12' && matrix.os == 'ubuntu-latest' run: sh download-more-tests.sh - name: run tests run: python -m pytest diff --git a/pyproject.toml b/pyproject.toml index 55869ff23..ca6cbdb01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,9 @@ [build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta" +build-backend = "hatchling.build" +requires = [ + "hatch-vcs", + "hatchling", +] [project] name = "sphinx-lint" @@ -19,7 +22,7 @@ classifiers = [ "Natural Language :: English", "Programming Language :: Python :: 3", ] -requires-python = ">= 3.7" +requires-python = ">= 3.8" dependencies = [ "regex", "polib", @@ -33,10 +36,11 @@ repository = "https://github.com/sphinx-contrib/sphinx-lint" [project.scripts] sphinx-lint = "sphinxlint.__main__:main" -[tool.setuptools] -packages = ["sphinxlint"] -include-package-data = false -dynamic.version.attr = "sphinxlint.__version__" +[tool.hatch] +version.source = "vcs" + +[tool.hatch.version.raw-options] +local_scheme = "no-local-version" [tool.black] diff --git a/sphinxlint/__init__.py b/sphinxlint/__init__.py index 28d5740cf..91df07472 100644 --- a/sphinxlint/__init__.py +++ b/sphinxlint/__init__.py @@ -1,7 +1,9 @@ """Sphinx linter.""" -__version__ = "0.6.8" +import importlib.metadata from sphinxlint.sphinxlint import check_file, check_text +__version__ = importlib.metadata.version("sphinx_lint") + __all__ = ["check_text", "check_file"] diff --git a/sphinxlint/__main__.py b/sphinxlint/__main__.py index 181f7872c..59ab56ca7 100644 --- a/sphinxlint/__main__.py +++ b/sphinxlint/__main__.py @@ -5,7 +5,7 @@ import sys from itertools import chain, starmap -from sphinxlint import check_file +from sphinxlint import check_file, __version__ from sphinxlint.checkers import all_checkers from sphinxlint.sphinxlint import CheckersOptions @@ -58,6 +58,16 @@ def __call__(self, parser, namespace, values, option_string=None): ) from None setattr(namespace, self.dest, sort_fields) + class StoreNumJobsAction(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, self.job_count(values)) + + @staticmethod + def job_count(values): + if values == "auto": + return os.cpu_count() + return max(int(values), 1) + parser.add_argument( "-v", "--verbose", @@ -109,6 +119,19 @@ def __call__(self, parser, namespace, values, option_string=None): help="comma-separated list of fields used to sort errors by. Available " f"fields are: {SortField.as_supported_options()}", ) + parser.add_argument( + "-j", + "--jobs", + metavar="N", + action=StoreNumJobsAction, + help="Run in parallel with N processes. Defaults to 'auto', " + "which sets N to the number of logical CPUs." + "Values <= 1 are all considered 1.", + default=StoreNumJobsAction.job_count("auto") + ) + parser.add_argument( + "-V", "--version", action="version", version=f"%(prog)s {__version__}" + ) parser.add_argument("paths", default=".", nargs="*") args = parser.parse_args(argv[1:]) @@ -206,10 +229,10 @@ def main(argv=None): for path in chain.from_iterable(walk(path, args.ignore) for path in args.paths) ] - if len(todo) < 8: + if args.jobs == 1 or len(todo) < 8: count = print_errors(sort_errors(starmap(check_file, todo), args.sort_by)) else: - with multiprocessing.Pool() as pool: + with multiprocessing.Pool(processes=args.jobs) as pool: count = print_errors( sort_errors(pool.imap_unordered(_check_file, todo), args.sort_by) ) diff --git a/sphinxlint/checkers.py b/sphinxlint/checkers.py index 5354606f6..791cac6ca 100644 --- a/sphinxlint/checkers.py +++ b/sphinxlint/checkers.py @@ -63,6 +63,10 @@ def check_missing_backtick_after_role(file, lines, options=None): yield paragraph_lno + error_offset, f"role missing closing backtick: {error.group(0)!r}" +_RST_ROLE_RE = re.compile("``.+?``(?!`).", flags=re.DOTALL) +_END_STRING_SUFFIX_RE = re.compile(rst.END_STRING_SUFFIX) + + @checker(".rst", ".po") def check_missing_space_after_literal(file, lines, options=None): r"""Search for inline literals immediately followed by a character. @@ -74,8 +78,8 @@ def check_missing_space_after_literal(file, lines, options=None): if paragraph.count("|") > 4: return # we don't handle tables yet. paragraph = clean_paragraph(paragraph) - for role in re.finditer("``.+?``(?!`).", paragraph, flags=re.DOTALL): - if not re.match(rst.END_STRING_SUFFIX, role.group(0)[-1]): + for role in _RST_ROLE_RE.finditer(paragraph): + if not _END_STRING_SUFFIX_RE.match(role[0][-1]): error_offset = paragraph[: role.start()].count("\n") yield ( paragraph_lno + error_offset, @@ -84,6 +88,9 @@ def check_missing_space_after_literal(file, lines, options=None): ) +_LONE_DOUBLE_BACKTICK_RE = re.compile("(? 4: return # we don't handle tables yet. paragraph = clean_paragraph(paragraph) - for lone_double_backtick in re.finditer("(?= 4 and "|" in match.group(0)): return # we don't handle tables yet. - if re.search(rst.ROLE_TAG + "$", before_match): - # It's not a default role: it starts with a tag. - continue - if re.search("^" + rst.ROLE_TAG, after_match): + if _ends_with_role_tag(before_match): # It's not a default role: it ends with a tag. continue + if _starts_with_role_tag(after_match): + # It's not a default role: it starts with a tag. + continue if match.group(0).startswith("``") and match.group(0).endswith("``"): # It's not a default role: it's an inline literal. continue @@ -157,6 +168,21 @@ def check_directive_missing_colons(file, lines, options=None): yield lno, "comment seems to be intended as a directive" +# The difficulty here is that the following is valid: +# The :literal:`:exc:`Exceptions`` +# While this is not: +# The :literal:`:exc:`Exceptions``s +_ROLE_BODY = rf"([^`]|\s`+|\\`|:{rst.SIMPLENAME}:`([^`]|\s`+|\\`)+`)+" +_ALLOWED_AFTER_ROLE = ( + rst.ASCII_ALLOWED_AFTER_INLINE_MARKUP + + rst.UNICODE_ALLOWED_AFTER_INLINE_MARKUP + + r"|\s" +) +_SUSPICIOUS_ROLE = re.compile( + f":{rst.SIMPLENAME}:`{_ROLE_BODY}`[^{_ALLOWED_AFTER_ROLE}]" +) + + @checker(".rst", ".po") def check_missing_space_after_role(file, lines, options=None): r"""Search for roles immediately followed by a character. @@ -164,23 +190,9 @@ def check_missing_space_after_role(file, lines, options=None): Bad: :exc:`Exception`s. Good: :exc:`Exceptions`\ s """ - # The difficulty here is that the following is valid: - # The :literal:`:exc:`Exceptions`` - # While this is not: - # The :literal:`:exc:`Exceptions``s - role_body = rf"([^`]|\s`+|\\`|:{rst.SIMPLENAME}:`([^`]|\s`+|\\`)+`)+" - allowed_after_role = ( - rst.ASCII_ALLOWED_AFTER_INLINE_MARKUP - + rst.UNICODE_ALLOWED_AFTER_INLINE_MARKUP - + r"|\s" - ) - - suspicious_role = re.compile( - f":{rst.SIMPLENAME}:`{role_body}`[^{allowed_after_role}]" - ) for lno, line in enumerate(lines, start=1): line = clean_paragraph(line) - role = suspicious_role.search(line) + role = _SUSPICIOUS_ROLE.search(line) if role: yield lno, f"role missing (escaped) space after role: {role.group(0)!r}" @@ -273,7 +285,7 @@ def check_role_with_double_backticks(file, lines, options=None): if inline_literal is None: break before = paragraph[: inline_literal.start()] - if re.search(rst.ROLE_TAG + "$", before): + if _ends_with_role_tag(before): error_offset = paragraph[: inline_literal.start()].count("\n") yield paragraph_lno + error_offset, "role use a single backtick, double backtick found." paragraph = ( @@ -324,6 +336,9 @@ def check_missing_space_before_default_role(file, lines, options=None): ) +_HYPERLINK_REFERENCE_RE = re.compile(r"\S* `_") + + @checker(".rst", ".po") def check_hyperlink_reference_missing_backtick(file, lines, options=None): """Search for missing backticks in front of hyperlink references. @@ -336,7 +351,7 @@ def check_hyperlink_reference_missing_backtick(file, lines, options=None): return # we don't handle tables yet. paragraph = clean_paragraph(paragraph) paragraph = rst.INTERPRETED_TEXT_RE.sub("", paragraph) - for hyperlink_reference in re.finditer(r"\S* `_", paragraph): + for hyperlink_reference in _HYPERLINK_REFERENCE_RE.finditer(paragraph): error_offset = paragraph[: hyperlink_reference.start()].count("\n") context = hyperlink_reference.group(0) yield ( @@ -390,6 +405,12 @@ def check_missing_final_newline(file, lines, options=None): yield len(lines), "No newline at end of file." +_is_long_interpreted_text = re.compile(r"^\s*\W*(:(\w+:)+)?`.*`\W*$").match +_starts_with_directive_or_hyperlink = re.compile(r"^\s*\.\. ").match +_starts_with_anonymous_hyperlink = re.compile(r"^\s*__ ").match +_is_very_long_string_literal = re.compile(r"^\s*``[^`]+``$").match + + @checker(".rst", ".po", enabled=False, rst_only=True) def check_line_too_long(file, lines, options=None): """Check for line length; this checker is not run by default.""" @@ -398,13 +419,13 @@ def check_line_too_long(file, lines, options=None): if len(line) - 1 > options.max_line_length: if line.lstrip()[0] in "+|": continue # ignore wide tables - if re.match(r"^\s*\W*(:(\w+:)+)?`.*`\W*$", line): + if _is_long_interpreted_text(line): continue # ignore long interpreted text - if re.match(r"^\s*\.\. ", line): + if _starts_with_directive_or_hyperlink(line): continue # ignore directives and hyperlink targets - if re.match(r"^\s*__ ", line): + if _starts_with_anonymous_hyperlink(line): continue # ignore anonymous hyperlink targets - if re.match(r"^\s*``[^`]+``$", line): + if _is_very_long_string_literal(line): continue # ignore a very long literal string yield lno + 1, f"Line too long ({len(line)-1}/{options.max_line_length})" @@ -437,6 +458,9 @@ def check_triple_backticks(file, lines, options=None): yield lno + 1, "There's no rst syntax using triple backticks" +_has_bad_dedent = re.compile(" [^ ].*::$").match + + @checker(".rst", ".po", rst_only=False) def check_bad_dedent(file, lines, options=None): """Check for mis-alignment in indentation in code blocks. @@ -454,17 +478,20 @@ def check_bad_dedent(file, lines, options=None): def check_block(block_lineno, block): for lineno, line in enumerate(block.splitlines()): - if re.match(" [^ ].*::$", line): + if _has_bad_dedent(line): errors.append((block_lineno + lineno, "Bad dedent in block")) list(hide_non_rst_blocks(lines, hidden_block_cb=check_block)) yield from errors +_has_dangling_hyphen = re.compile(r".*[a-z]-$").match + + @checker(".rst", rst_only=True) def check_dangling_hyphen(file, lines, options): """Check for lines ending in a hyphen.""" for lno, line in enumerate(lines): stripped_line = line.rstrip("\n") - if re.match(r".*[a-z]-$", stripped_line): + if _has_dangling_hyphen(stripped_line): yield lno + 1, f"Line ends with dangling hyphen" diff --git a/sphinxlint/rst.py b/sphinxlint/rst.py index dcfded94b..0e8b7b473 100644 --- a/sphinxlint/rst.py +++ b/sphinxlint/rst.py @@ -4,6 +4,7 @@ - All constants are ALL_CAPS - All compiled regexes are suffixed by _RE """ +from functools import lru_cache import regex as re @@ -151,6 +152,7 @@ UNICODE_ALLOWED_AFTER_INLINE_MARKUP = r"\p{Pe}\p{Pi}\p{Pf}\p{Pd}\p{Po}" +@lru_cache(maxsize=None) def inline_markup_gen(start_string, end_string, extra_allowed_before=""): """Generate a regex matching an inline markup. diff --git a/sphinxlint/sphinxlint.py b/sphinxlint/sphinxlint.py index d9b2e6a09..ec699dbf9 100644 --- a/sphinxlint/sphinxlint.py +++ b/sphinxlint/sphinxlint.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from os.path import splitext -from sphinxlint.utils import hide_non_rst_blocks, po2rst +from sphinxlint.utils import PER_FILE_CACHES, hide_non_rst_blocks, po2rst @dataclass(frozen=True) @@ -36,7 +36,7 @@ def check_text(filename, text, checkers, options=None): errors = [] ext = splitext(filename)[1] checkers = {checker for checker in checkers if ext in checker.suffixes} - lines = text.splitlines(keepends=True) + lines = tuple(text.splitlines(keepends=True)) if any(checker.rst_only for checker in checkers): lines_with_rst_only = hide_non_rst_blocks(lines) for check in checkers: @@ -50,16 +50,20 @@ def check_text(filename, text, checkers, options=None): def check_file(filename, checkers, options: CheckersOptions = None): - ext = splitext(filename)[1] - if not any(ext in checker.suffixes for checker in checkers): - return Counter() try: - with open(filename, encoding="utf-8") as f: - text = f.read() - if filename.endswith(".po"): - text = po2rst(text) - except OSError as err: - return [f"{filename}: cannot open: {err}"] - except UnicodeDecodeError as err: - return [f"{filename}: cannot decode as UTF-8: {err}"] - return check_text(filename, text, checkers, options) + ext = splitext(filename)[1] + if not any(ext in checker.suffixes for checker in checkers): + return Counter() + try: + with open(filename, encoding="utf-8") as f: + text = f.read() + if filename.endswith(".po"): + text = po2rst(text) + except OSError as err: + return [f"{filename}: cannot open: {err}"] + except UnicodeDecodeError as err: + return [f"{filename}: cannot decode as UTF-8: {err}"] + return check_text(filename, text, checkers, options) + finally: + for memoized_function in PER_FILE_CACHES: + memoized_function.cache_clear() diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py index f15465545..1dfb3ffec 100644 --- a/sphinxlint/utils.py +++ b/sphinxlint/utils.py @@ -1,10 +1,21 @@ """Just a bunch of utility functions for sphinxlint.""" +from functools import lru_cache + import regex as re from polib import pofile from sphinxlint import rst +PER_FILE_CACHES = [] + + +def per_file_cache(func): + memoized_func = lru_cache(maxsize=None)(func) + PER_FILE_CACHES.append(memoized_func) + return memoized_func + + def match_size(re_match): return re_match.end() - re_match.start() @@ -27,6 +38,7 @@ def _clean_heuristic(paragraph, regex): paragraph = paragraph[: candidate.start()] + paragraph[candidate.end() :] +@per_file_cache def clean_paragraph(paragraph): """Removes all good constructs, so detectors can focus on bad ones. @@ -42,6 +54,7 @@ def clean_paragraph(paragraph): return paragraph.replace("\x00", "\\") +@per_file_cache def escape2null(text): r"""Return a string with escape-backslashes converted to nulls. @@ -75,10 +88,12 @@ def escape2null(text): start = found + 2 # skip character after escape +@per_file_cache def paragraphs(lines): """Yield (paragraph_line_no, paragraph_text) pairs describing paragraphs of the given lines. """ + output = [] paragraph = [] paragraph_lno = 1 for lno, line in enumerate(lines, start=1): @@ -88,10 +103,11 @@ def paragraphs(lines): paragraph_lno = lno paragraph.append(line) elif paragraph: - yield paragraph_lno, "".join(paragraph) + output.append((paragraph_lno, "".join(paragraph))) paragraph = [] if paragraph: - yield paragraph_lno, "".join(paragraph) + output.append((paragraph_lno, "".join(paragraph))) + return tuple(output) def looks_like_glued(match): @@ -121,23 +137,31 @@ def looks_like_glued(match): return True +_START_OF_COMMENT_BLOCK_RE = re.compile(r"^\s*\.\.$") +_PRODUCTION_LIST_DIRECTIVE_RE = re.compile(r"^ *.. productionlist::") +_COMMENT_RE = re.compile(r"^ *\.\. ") + + def is_multiline_non_rst_block(line): """Returns True if the next lines are an indented literal block.""" - if re.match(r"^\s*\.\.$", line): # it's the start of a comment block. + if _START_OF_COMMENT_BLOCK_RE.search(line): return True if rst.DIRECTIVES_CONTAINING_RST_RE.match(line): return False if rst.DIRECTIVES_CONTAINING_ARBITRARY_CONTENT_RE.match(line): return True - if re.match(r"^ *.. productionlist::", line): + if _PRODUCTION_LIST_DIRECTIVE_RE.search(line): return True - if re.match(r"^ *\.\. ", line) and type_of_explicit_markup(line) == "comment": + if _COMMENT_RE.search(line) and type_of_explicit_markup(line) == "comment": return True if line.endswith("::\n"): # It's a literal block return True return False +_ZERO_OR_MORE_SPACES_RE = re.compile(" *") + + def hide_non_rst_blocks(lines, hidden_block_cb=None): """Filters out literal, comments, code blocks, ... @@ -150,7 +174,7 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): output = [] for lineno, line in enumerate(lines, start=1): if in_literal is not None: - current_indentation = len(re.match(" *", line).group(0)) + current_indentation = len(_ZERO_OR_MORE_SPACES_RE.match(line)[0]) if current_indentation > in_literal or line == "\n": excluded_lines.append(line if line == "\n" else line[in_literal:]) line = "\n" # Hiding line @@ -160,32 +184,40 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): hidden_block_cb(block_line_start, "".join(excluded_lines)) excluded_lines = [] if in_literal is None and is_multiline_non_rst_block(line): - in_literal = len(re.match(" *", line).group(0)) + in_literal = len(_ZERO_OR_MORE_SPACES_RE.match(line)[0]) block_line_start = lineno assert not excluded_lines if ( - re.match(r" *\.\. ", line) - and type_of_explicit_markup(line) == "comment" + type_of_explicit_markup(line) == "comment" + and _COMMENT_RE.search(line) ): line = "\n" output.append(line) if excluded_lines and hidden_block_cb: hidden_block_cb(block_line_start, "".join(excluded_lines)) - return output + return tuple(output) + + +_starts_with_directive_marker = re.compile(rf"\.\. {rst.ALL_DIRECTIVES}::").match +_starts_with_footnote_marker = re.compile(r"\.\. \[[0-9]+\] ").match +_starts_with_citation_marker = re.compile(r"\.\. \[[^\]]+\] ").match +_starts_with_target = re.compile(r"\.\. _.*[^_]: ").match +_starts_with_substitution_definition = re.compile(r"\.\. \|[^\|]*\| ").match +@per_file_cache def type_of_explicit_markup(line): """Tell apart various explicit markup blocks.""" line = line.lstrip() - if re.match(rf"\.\. {rst.ALL_DIRECTIVES}::", line): + if _starts_with_directive_marker(line): return "directive" - if re.match(r"\.\. \[[0-9]+\] ", line): + if _starts_with_footnote_marker(line): return "footnote" - if re.match(r"\.\. \[[^\]]+\] ", line): + if _starts_with_citation_marker(line): return "citation" - if re.match(r"\.\. _.*[^_]: ", line): + if _starts_with_target(line): return "target" - if re.match(r"\.\. \|[^\|]*\| ", line): + if _starts_with_substitution_definition(line): return "substitution_definition" return "comment" diff --git a/tests/test_sphinxlint.py b/tests/test_sphinxlint.py index 5739b3e7b..baddfd416 100644 --- a/tests/test_sphinxlint.py +++ b/tests/test_sphinxlint.py @@ -64,13 +64,13 @@ def test_sphinxlint_shall_not_pass(file, expected_errors, capsys): assert expected_error in out number_of_expected_errors = len(expected_errors) number_of_reported_errors = len(out.splitlines()) - assert number_of_expected_errors == number_of_reported_errors + assert number_of_expected_errors == number_of_reported_errors, f"{number_of_reported_errors=}, {out=}" @pytest.mark.parametrize("file", [str(FIXTURE_DIR / "paragraphs.rst")]) def test_paragraphs(file): with open(file) as f: - lines = f.readlines() + lines = tuple(f.readlines()) actual = paragraphs(lines) for lno, para in actual: firstpline = para.splitlines(keepends=True)[0]