From 693b578b1ba84f663a0d3f4040a0c48ba06b4686 Mon Sep 17 00:00:00 2001 From: MFTabriz <39205857+MFTabriz@users.noreply.github.com> Date: Mon, 31 Jul 2023 13:16:08 +0200 Subject: [PATCH 01/36] add --version --- sphinxlint/__main__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sphinxlint/__main__.py b/sphinxlint/__main__.py index 181f7872c..22f810be1 100644 --- a/sphinxlint/__main__.py +++ b/sphinxlint/__main__.py @@ -5,7 +5,7 @@ import sys from itertools import chain, starmap -from sphinxlint import check_file +from sphinxlint import check_file, __version__ from sphinxlint.checkers import all_checkers from sphinxlint.sphinxlint import CheckersOptions @@ -109,6 +109,12 @@ def __call__(self, parser, namespace, values, option_string=None): help="comma-separated list of fields used to sort errors by. Available " f"fields are: {SortField.as_supported_options()}", ) + parser.add_argument( + "--version", + action="version", + version=" ".join(["sphinxlint", __version__]), + help="print version", + ) parser.add_argument("paths", default=".", nargs="*") args = parser.parse_args(argv[1:]) From fec2aacac3a35a951c4b3d20050f9c36c80d2c13 Mon Sep 17 00:00:00 2001 From: Meisam <39205857+MFTabriz@users.noreply.github.com> Date: Mon, 31 Jul 2023 16:17:52 +0200 Subject: [PATCH 02/36] Update sphinxlint/__main__.py Co-authored-by: Hugo van Kemenade --- sphinxlint/__main__.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sphinxlint/__main__.py b/sphinxlint/__main__.py index 22f810be1..ef99a3a23 100644 --- a/sphinxlint/__main__.py +++ b/sphinxlint/__main__.py @@ -110,10 +110,7 @@ def __call__(self, parser, namespace, values, option_string=None): f"fields are: {SortField.as_supported_options()}", ) parser.add_argument( - "--version", - action="version", - version=" ".join(["sphinxlint", __version__]), - help="print version", + "-V", "--version", action="version", version=f"%(prog)s {__version__}" ) parser.add_argument("paths", default=".", nargs="*") From bc6b7180d3a655a79a7180e88adf0fa204c6f4b0 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Mon, 31 Jul 2023 17:26:50 +0300 Subject: [PATCH 03/36] Drop support for EOL Python 3.7 --- .github/workflows/tests.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 60192c703..38bd3b837 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12-dev"] os: [ubuntu-latest, macos-latest, windows-latest] steps: - uses: actions/checkout@v3 diff --git a/pyproject.toml b/pyproject.toml index 55869ff23..4eac7c559 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ classifiers = [ "Natural Language :: English", "Programming Language :: Python :: 3", ] -requires-python = ">= 3.7" +requires-python = ">= 3.8" dependencies = [ "regex", "polib", From d2139fb6edb8f8c7fc5f7a29b03a5e72d287ae49 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Thu, 3 Aug 2023 19:26:11 +0300 Subject: [PATCH 04/36] Replace 3.12-dev with 3.12 and 'allow-prereleases: true' --- .github/workflows/tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 38bd3b837..27d7e2512 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12-dev"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] os: [ubuntu-latest, macos-latest, windows-latest] steps: - uses: actions/checkout@v3 @@ -21,6 +21,7 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + allow-prereleases: true cache: pip cache-dependency-path: .github/workflows/tests.yml - name: Upgrade pip From 3fd1552a8758c8aeee6428443da69d143106ea29 Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Tue, 10 Oct 2023 23:50:31 +0800 Subject: [PATCH 05/36] Use pre-compiled regular expressions These speed up linting by potentially big factors (e.g., linting CPython/Docs/library/*.rst is ~25% faster locally). Signed-off-by: Rodrigo Tobar --- sphinxlint/utils.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py index f15465545..a65b36b82 100644 --- a/sphinxlint/utils.py +++ b/sphinxlint/utils.py @@ -121,17 +121,21 @@ def looks_like_glued(match): return True +_START_OF_COMMENT_BLOCK_PATTERN = re.compile(r"^\s*\.\.$") +_PRODUCTION_LIST_DIRECTIVE_PATTERN = re.compile(r"^ *.. productionlist::") +_COMMENT_PATTERN = re.compile(r"^ *\.\. ") + def is_multiline_non_rst_block(line): """Returns True if the next lines are an indented literal block.""" - if re.match(r"^\s*\.\.$", line): # it's the start of a comment block. + if _START_OF_COMMENT_BLOCK_PATTERN.search(line): return True if rst.DIRECTIVES_CONTAINING_RST_RE.match(line): return False if rst.DIRECTIVES_CONTAINING_ARBITRARY_CONTENT_RE.match(line): return True - if re.match(r"^ *.. productionlist::", line): + if _PRODUCTION_LIST_DIRECTIVE_PATTERN.search(line): return True - if re.match(r"^ *\.\. ", line) and type_of_explicit_markup(line) == "comment": + if _COMMENT_PATTERN.search(line) and type_of_explicit_markup(line) == "comment": return True if line.endswith("::\n"): # It's a literal block return True From da8c97192d7ac43e8eccbeb6185d172346e10b68 Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Wed, 11 Oct 2023 01:06:06 +0800 Subject: [PATCH 06/36] Allow users to provide an explicit process count Instead of always defaulting to using as many processes are CPUs (or no sub-processes if checking less than 8 files), a new -j/--jobs flag allows users to customise how many sub-processes will be used to perform the checks. Defaults to the old behavior (i.e., os.cpu_count() processes), caps values on the lower end to 1 automatically, prevents subprocess spawning if requiring only one process, and aligns with the sphiinx-build semantics. Signed-off-by: Rodrigo Tobar --- sphinxlint/__main__.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/sphinxlint/__main__.py b/sphinxlint/__main__.py index ef99a3a23..59f6dbfa1 100644 --- a/sphinxlint/__main__.py +++ b/sphinxlint/__main__.py @@ -58,6 +58,14 @@ def __call__(self, parser, namespace, values, option_string=None): ) from None setattr(namespace, self.dest, sort_fields) + class StoreNumJobsAction(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, StoreNumJobsAction._job_count(values)) + def _job_count(values): + if values == "auto": + return os.cpu_count() + return max(int(values), 1) + parser.add_argument( "-v", "--verbose", @@ -109,6 +117,16 @@ def __call__(self, parser, namespace, values, option_string=None): help="comma-separated list of fields used to sort errors by. Available " f"fields are: {SortField.as_supported_options()}", ) + parser.add_argument( + "-j", + "--jobs", + metavar="N", + action=StoreNumJobsAction, + help="Run in parallle with N processes, defaults to \"auto\". " + "Special value \"auto\" will set N to cpu-count. " + "Values <= 1 are all considered 1.", + default=StoreNumJobsAction._job_count("auto") + ) parser.add_argument( "-V", "--version", action="version", version=f"%(prog)s {__version__}" ) @@ -209,10 +227,10 @@ def main(argv=None): for path in chain.from_iterable(walk(path, args.ignore) for path in args.paths) ] - if len(todo) < 8: + if args.jobs == 1 or len(todo) < 8: count = print_errors(sort_errors(starmap(check_file, todo), args.sort_by)) else: - with multiprocessing.Pool() as pool: + with multiprocessing.Pool(processes=args.jobs) as pool: count = print_errors( sort_errors(pool.imap_unordered(_check_file, todo), args.sort_by) ) From 00fab9254269039ffb7026f084945565b3462314 Mon Sep 17 00:00:00 2001 From: rtobar Date: Wed, 11 Oct 2023 10:19:43 +0800 Subject: [PATCH 07/36] Update sphinxlint/__main__.py Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- sphinxlint/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sphinxlint/__main__.py b/sphinxlint/__main__.py index 59f6dbfa1..4240deaa2 100644 --- a/sphinxlint/__main__.py +++ b/sphinxlint/__main__.py @@ -122,8 +122,8 @@ def _job_count(values): "--jobs", metavar="N", action=StoreNumJobsAction, - help="Run in parallle with N processes, defaults to \"auto\". " - "Special value \"auto\" will set N to cpu-count. " + help="Run in parallle with N processes, defaults to 'auto', " + "which sets N to the number of logical CPUs." "Values <= 1 are all considered 1.", default=StoreNumJobsAction._job_count("auto") ) From 607e9916db773423a731e1579238367f59e3f77b Mon Sep 17 00:00:00 2001 From: rtobar Date: Wed, 11 Oct 2023 10:19:51 +0800 Subject: [PATCH 08/36] Update sphinxlint/__main__.py Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- sphinxlint/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxlint/__main__.py b/sphinxlint/__main__.py index 4240deaa2..7b0ccc88c 100644 --- a/sphinxlint/__main__.py +++ b/sphinxlint/__main__.py @@ -60,7 +60,7 @@ def __call__(self, parser, namespace, values, option_string=None): class StoreNumJobsAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, StoreNumJobsAction._job_count(values)) + setattr(namespace, self.dest, self._job_count(values)) def _job_count(values): if values == "auto": return os.cpu_count() From f2a255d4e535dc7f5a9315a9c2ac22544f0b71f4 Mon Sep 17 00:00:00 2001 From: rtobar Date: Wed, 11 Oct 2023 10:19:58 +0800 Subject: [PATCH 09/36] Update sphinxlint/__main__.py Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- sphinxlint/__main__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sphinxlint/__main__.py b/sphinxlint/__main__.py index 7b0ccc88c..4791aa985 100644 --- a/sphinxlint/__main__.py +++ b/sphinxlint/__main__.py @@ -61,6 +61,8 @@ def __call__(self, parser, namespace, values, option_string=None): class StoreNumJobsAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, self._job_count(values)) + + @staticmethod def _job_count(values): if values == "auto": return os.cpu_count() From a7ae4ea99a93cc4c82b5459353bddd5121d66ffb Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Wed, 11 Oct 2023 10:32:22 +0200 Subject: [PATCH 10/36] Add caches to several functions in `utils.py` --- sphinxlint/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py index f15465545..20941928a 100644 --- a/sphinxlint/utils.py +++ b/sphinxlint/utils.py @@ -1,4 +1,6 @@ """Just a bunch of utility functions for sphinxlint.""" +from functools import lru_cache + import regex as re from polib import pofile @@ -27,6 +29,7 @@ def _clean_heuristic(paragraph, regex): paragraph = paragraph[: candidate.start()] + paragraph[candidate.end() :] +@lru_cache() def clean_paragraph(paragraph): """Removes all good constructs, so detectors can focus on bad ones. @@ -42,6 +45,7 @@ def clean_paragraph(paragraph): return paragraph.replace("\x00", "\\") +@lru_cache() def escape2null(text): r"""Return a string with escape-backslashes converted to nulls. @@ -174,6 +178,7 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): return output +@lru_cache() def type_of_explicit_markup(line): """Tell apart various explicit markup blocks.""" line = line.lstrip() From 7d63ef34ecc9d594692f6b39974a43e9b565414b Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Wed, 11 Oct 2023 10:49:58 +0200 Subject: [PATCH 11/36] A more complex cache for `hide_non_rst_blocks()` --- sphinxlint/utils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py index 20941928a..7d2a5bec9 100644 --- a/sphinxlint/utils.py +++ b/sphinxlint/utils.py @@ -142,12 +142,22 @@ def is_multiline_non_rst_block(line): return False +_NON_RST_BLOCKS_CACHE = {} + + def hide_non_rst_blocks(lines, hidden_block_cb=None): """Filters out literal, comments, code blocks, ... The filter actually replace "removed" lines by empty lines, so the line numbering still make sense. + + This function is quite hot, so we cache the returned value where possible. + The function is only "pure" when hidden_block_cb is None, however, + so we can only safely cache the output when hidden_block_cb=None. """ + lines = tuple(lines) + if hidden_block_cb is None and lines in _NON_RST_BLOCKS_CACHE: + return _NON_RST_BLOCKS_CACHE[lines] in_literal = None excluded_lines = [] block_line_start = None @@ -175,6 +185,9 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): output.append(line) if excluded_lines and hidden_block_cb: hidden_block_cb(block_line_start, "".join(excluded_lines)) + output = tuple(output) + if hidden_block_cb is None: + _NON_RST_BLOCKS_CACHE[lines] = output return output From aad06d046580987ba817c7de0b24b6b4f35ae1f0 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Wed, 11 Oct 2023 11:11:28 +0200 Subject: [PATCH 12/36] Also do `paragraphs()` --- sphinxlint/sphinxlint.py | 2 +- sphinxlint/utils.py | 7 +++++-- tests/test_sphinxlint.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/sphinxlint/sphinxlint.py b/sphinxlint/sphinxlint.py index d9b2e6a09..646e7294a 100644 --- a/sphinxlint/sphinxlint.py +++ b/sphinxlint/sphinxlint.py @@ -36,7 +36,7 @@ def check_text(filename, text, checkers, options=None): errors = [] ext = splitext(filename)[1] checkers = {checker for checker in checkers if ext in checker.suffixes} - lines = text.splitlines(keepends=True) + lines = tuple(text.splitlines(keepends=True)) if any(checker.rst_only for checker in checkers): lines_with_rst_only = hide_non_rst_blocks(lines) for check in checkers: diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py index 7d2a5bec9..885c29eb7 100644 --- a/sphinxlint/utils.py +++ b/sphinxlint/utils.py @@ -79,10 +79,12 @@ def escape2null(text): start = found + 2 # skip character after escape +@lru_cache() def paragraphs(lines): """Yield (paragraph_line_no, paragraph_text) pairs describing paragraphs of the given lines. """ + output = [] paragraph = [] paragraph_lno = 1 for lno, line in enumerate(lines, start=1): @@ -92,10 +94,11 @@ def paragraphs(lines): paragraph_lno = lno paragraph.append(line) elif paragraph: - yield paragraph_lno, "".join(paragraph) + output.append((paragraph_lno, "".join(paragraph))) paragraph = [] if paragraph: - yield paragraph_lno, "".join(paragraph) + output.append((paragraph_lno, "".join(paragraph))) + return tuple(output) def looks_like_glued(match): diff --git a/tests/test_sphinxlint.py b/tests/test_sphinxlint.py index 5739b3e7b..841b8e394 100644 --- a/tests/test_sphinxlint.py +++ b/tests/test_sphinxlint.py @@ -70,7 +70,7 @@ def test_sphinxlint_shall_not_pass(file, expected_errors, capsys): @pytest.mark.parametrize("file", [str(FIXTURE_DIR / "paragraphs.rst")]) def test_paragraphs(file): with open(file) as f: - lines = f.readlines() + lines = tuple(f.readlines()) actual = paragraphs(lines) for lno, para in actual: firstpline = para.splitlines(keepends=True)[0] From 36f2075526967a9cd02726581704b3b566c169a3 Mon Sep 17 00:00:00 2001 From: rtobar Date: Wed, 11 Oct 2023 21:47:51 +0800 Subject: [PATCH 13/36] Update sphinxlint/__main__.py Co-authored-by: Hugo van Kemenade --- sphinxlint/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxlint/__main__.py b/sphinxlint/__main__.py index 4791aa985..2317a9a12 100644 --- a/sphinxlint/__main__.py +++ b/sphinxlint/__main__.py @@ -63,7 +63,7 @@ def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, self._job_count(values)) @staticmethod - def _job_count(values): + def job_count(values): if values == "auto": return os.cpu_count() return max(int(values), 1) From 29ecd3fd7bba8ad3233b85e39783813595fad84b Mon Sep 17 00:00:00 2001 From: rtobar Date: Wed, 11 Oct 2023 21:47:58 +0800 Subject: [PATCH 14/36] Update sphinxlint/__main__.py Co-authored-by: Hugo van Kemenade --- sphinxlint/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxlint/__main__.py b/sphinxlint/__main__.py index 2317a9a12..ae6fed85b 100644 --- a/sphinxlint/__main__.py +++ b/sphinxlint/__main__.py @@ -127,7 +127,7 @@ def job_count(values): help="Run in parallle with N processes, defaults to 'auto', " "which sets N to the number of logical CPUs." "Values <= 1 are all considered 1.", - default=StoreNumJobsAction._job_count("auto") + default=StoreNumJobsAction.job_count("auto") ) parser.add_argument( "-V", "--version", action="version", version=f"%(prog)s {__version__}" From 9e06ae3e53d874479d521f9b363bfdcf0e795d69 Mon Sep 17 00:00:00 2001 From: rtobar Date: Wed, 11 Oct 2023 21:48:08 +0800 Subject: [PATCH 15/36] Update sphinxlint/__main__.py Co-authored-by: Hugo van Kemenade --- sphinxlint/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxlint/__main__.py b/sphinxlint/__main__.py index ae6fed85b..5e60ae07a 100644 --- a/sphinxlint/__main__.py +++ b/sphinxlint/__main__.py @@ -124,7 +124,7 @@ def job_count(values): "--jobs", metavar="N", action=StoreNumJobsAction, - help="Run in parallle with N processes, defaults to 'auto', " + help="Run in parallel with N processes. Defaults to 'auto', " "which sets N to the number of logical CPUs." "Values <= 1 are all considered 1.", default=StoreNumJobsAction.job_count("auto") From 0867edc6a812b94ea58d348910a3ce774f292a12 Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Wed, 11 Oct 2023 21:54:21 +0800 Subject: [PATCH 16/36] Reuse _COMMENT_PATTERN in hide_non_rst_blocks Signed-off-by: Rodrigo Tobar --- sphinxlint/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py index a65b36b82..fe018da8c 100644 --- a/sphinxlint/utils.py +++ b/sphinxlint/utils.py @@ -125,6 +125,7 @@ def looks_like_glued(match): _PRODUCTION_LIST_DIRECTIVE_PATTERN = re.compile(r"^ *.. productionlist::") _COMMENT_PATTERN = re.compile(r"^ *\.\. ") + def is_multiline_non_rst_block(line): """Returns True if the next lines are an indented literal block.""" if _START_OF_COMMENT_BLOCK_PATTERN.search(line): @@ -168,7 +169,7 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): block_line_start = lineno assert not excluded_lines if ( - re.match(r" *\.\. ", line) + _COMMENT_PATTERN.search(line) and type_of_explicit_markup(line) == "comment" ): line = "\n" From c6e81e42f27da89bcb5a551f558ec2293b986405 Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Wed, 11 Oct 2023 22:05:08 +0800 Subject: [PATCH 17/36] Rename _PATTERN to _RE The latter seems to be what the current codebase calls Pattern objects, so let's not add confusion. Signed-off-by: Rodrigo Tobar --- sphinxlint/utils.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py index fe018da8c..61a2ec75c 100644 --- a/sphinxlint/utils.py +++ b/sphinxlint/utils.py @@ -121,22 +121,22 @@ def looks_like_glued(match): return True -_START_OF_COMMENT_BLOCK_PATTERN = re.compile(r"^\s*\.\.$") -_PRODUCTION_LIST_DIRECTIVE_PATTERN = re.compile(r"^ *.. productionlist::") -_COMMENT_PATTERN = re.compile(r"^ *\.\. ") +_START_OF_COMMENT_BLOCK_RE = re.compile(r"^\s*\.\.$") +_PRODUCTION_LIST_DIRECTIVE_RE = re.compile(r"^ *.. productionlist::") +_COMMENT_RE = re.compile(r"^ *\.\. ") def is_multiline_non_rst_block(line): """Returns True if the next lines are an indented literal block.""" - if _START_OF_COMMENT_BLOCK_PATTERN.search(line): + if _START_OF_COMMENT_BLOCK_RE.search(line): return True if rst.DIRECTIVES_CONTAINING_RST_RE.match(line): return False if rst.DIRECTIVES_CONTAINING_ARBITRARY_CONTENT_RE.match(line): return True - if _PRODUCTION_LIST_DIRECTIVE_PATTERN.search(line): + if _PRODUCTION_LIST_DIRECTIVE_RE.search(line): return True - if _COMMENT_PATTERN.search(line) and type_of_explicit_markup(line) == "comment": + if _COMMENT_RE.search(line) and type_of_explicit_markup(line) == "comment": return True if line.endswith("::\n"): # It's a literal block return True @@ -169,7 +169,7 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): block_line_start = lineno assert not excluded_lines if ( - _COMMENT_PATTERN.search(line) + _COMMENT_RE.search(line) and type_of_explicit_markup(line) == "comment" ): line = "\n" From a0042882a8cde5c4dcb8e81656dd55fc6e5ec7c0 Mon Sep 17 00:00:00 2001 From: Rodrigo Tobar Date: Wed, 11 Oct 2023 21:59:50 +0800 Subject: [PATCH 18/36] Another pre-compiled RE for better perf Signed-off-by: Rodrigo Tobar --- sphinxlint/checkers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sphinxlint/checkers.py b/sphinxlint/checkers.py index 5354606f6..4fe038c62 100644 --- a/sphinxlint/checkers.py +++ b/sphinxlint/checkers.py @@ -461,10 +461,12 @@ def check_block(block_lineno, block): yield from errors +_DANGLING_HYPHEN_RE = re.compile(r".*[a-z]-$") + @checker(".rst", rst_only=True) def check_dangling_hyphen(file, lines, options): """Check for lines ending in a hyphen.""" for lno, line in enumerate(lines): stripped_line = line.rstrip("\n") - if re.match(r".*[a-z]-$", stripped_line): + if _DANGLING_HYPHEN_RE.match(stripped_line): yield lno + 1, f"Line ends with dangling hyphen" From 0ba5b88e2e99c02a969b59381a2897b9f2a75dcd Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Wed, 11 Oct 2023 16:36:31 +0200 Subject: [PATCH 19/36] Micro-optimise `check_missing_space_after_role()` --- sphinxlint/checkers.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/sphinxlint/checkers.py b/sphinxlint/checkers.py index 4fe038c62..f821caa88 100644 --- a/sphinxlint/checkers.py +++ b/sphinxlint/checkers.py @@ -157,6 +157,21 @@ def check_directive_missing_colons(file, lines, options=None): yield lno, "comment seems to be intended as a directive" +# The difficulty here is that the following is valid: +# The :literal:`:exc:`Exceptions`` +# While this is not: +# The :literal:`:exc:`Exceptions``s +_ROLE_BODY = rf"([^`]|\s`+|\\`|:{rst.SIMPLENAME}:`([^`]|\s`+|\\`)+`)+" +_ALLOWED_AFTER_ROLE = ( + rst.ASCII_ALLOWED_AFTER_INLINE_MARKUP + + rst.UNICODE_ALLOWED_AFTER_INLINE_MARKUP + + r"|\s" +) +_SUSPICIOUS_ROLE = re.compile( + f":{rst.SIMPLENAME}:`{_ROLE_BODY}`[^{_ALLOWED_AFTER_ROLE}]" +) + + @checker(".rst", ".po") def check_missing_space_after_role(file, lines, options=None): r"""Search for roles immediately followed by a character. @@ -164,23 +179,9 @@ def check_missing_space_after_role(file, lines, options=None): Bad: :exc:`Exception`s. Good: :exc:`Exceptions`\ s """ - # The difficulty here is that the following is valid: - # The :literal:`:exc:`Exceptions`` - # While this is not: - # The :literal:`:exc:`Exceptions``s - role_body = rf"([^`]|\s`+|\\`|:{rst.SIMPLENAME}:`([^`]|\s`+|\\`)+`)+" - allowed_after_role = ( - rst.ASCII_ALLOWED_AFTER_INLINE_MARKUP - + rst.UNICODE_ALLOWED_AFTER_INLINE_MARKUP - + r"|\s" - ) - - suspicious_role = re.compile( - f":{rst.SIMPLENAME}:`{role_body}`[^{allowed_after_role}]" - ) for lno, line in enumerate(lines, start=1): line = clean_paragraph(line) - role = suspicious_role.search(line) + role = _SUSPICIOUS_ROLE.search(line) if role: yield lno, f"role missing (escaped) space after role: {role.group(0)!r}" From b9a37b3c4aba93f1ab8c14080655794e4aa403a4 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Wed, 11 Oct 2023 16:49:31 +0200 Subject: [PATCH 20/36] Add a cache to `rst.inline_markup_gen()` --- sphinxlint/rst.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sphinxlint/rst.py b/sphinxlint/rst.py index dcfded94b..10be3a502 100644 --- a/sphinxlint/rst.py +++ b/sphinxlint/rst.py @@ -4,6 +4,7 @@ - All constants are ALL_CAPS - All compiled regexes are suffixed by _RE """ +from functools import cache import regex as re @@ -151,6 +152,7 @@ UNICODE_ALLOWED_AFTER_INLINE_MARKUP = r"\p{Pe}\p{Pi}\p{Pf}\p{Pd}\p{Po}" +@cache def inline_markup_gen(start_string, end_string, extra_allowed_before=""): """Generate a regex matching an inline markup. From 31408716916b8c82ed341ec2c970571f574da033 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 11 Oct 2023 16:51:53 +0200 Subject: [PATCH 21/36] py38 compat --- sphinxlint/rst.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sphinxlint/rst.py b/sphinxlint/rst.py index 10be3a502..0e8b7b473 100644 --- a/sphinxlint/rst.py +++ b/sphinxlint/rst.py @@ -4,7 +4,7 @@ - All constants are ALL_CAPS - All compiled regexes are suffixed by _RE """ -from functools import cache +from functools import lru_cache import regex as re @@ -152,7 +152,7 @@ UNICODE_ALLOWED_AFTER_INLINE_MARKUP = r"\p{Pe}\p{Pi}\p{Pf}\p{Pd}\p{Po}" -@cache +@lru_cache(maxsize=None) def inline_markup_gen(start_string, end_string, extra_allowed_before=""): """Generate a regex matching an inline markup. From 8df071effd50f9fa3068ee2eb7e040a3a9fcb997 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Thu, 12 Oct 2023 10:46:58 +0200 Subject: [PATCH 22/36] Pre-compile nearly all regex patterns --- sphinxlint/checkers.py | 42 ++++++++++++++++++++++++++++++------------ sphinxlint/rst.py | 3 +++ sphinxlint/utils.py | 26 +++++++++++++++++--------- 3 files changed, 50 insertions(+), 21 deletions(-) diff --git a/sphinxlint/checkers.py b/sphinxlint/checkers.py index f821caa88..e1e9c63dc 100644 --- a/sphinxlint/checkers.py +++ b/sphinxlint/checkers.py @@ -63,6 +63,9 @@ def check_missing_backtick_after_role(file, lines, options=None): yield paragraph_lno + error_offset, f"role missing closing backtick: {error.group(0)!r}" +_RST_ROLE_RE = re.compile("``.+?``(?!`).", flags=re.DOTALL) + + @checker(".rst", ".po") def check_missing_space_after_literal(file, lines, options=None): r"""Search for inline literals immediately followed by a character. @@ -74,8 +77,8 @@ def check_missing_space_after_literal(file, lines, options=None): if paragraph.count("|") > 4: return # we don't handle tables yet. paragraph = clean_paragraph(paragraph) - for role in re.finditer("``.+?``(?!`).", paragraph, flags=re.DOTALL): - if not re.match(rst.END_STRING_SUFFIX, role.group(0)[-1]): + for role in _RST_ROLE_RE.finditer(paragraph): + if not rst.END_STRING_SUFFIX_RE.match(role[0][-1]): error_offset = paragraph[: role.start()].count("\n") yield ( paragraph_lno + error_offset, @@ -84,6 +87,9 @@ def check_missing_space_after_literal(file, lines, options=None): ) +_LONE_DOUBLEBACKTICK_RE = re.compile("(? 4: return # we don't handle tables yet. paragraph = clean_paragraph(paragraph) - for lone_double_backtick in re.finditer("(?= 4 and "|" in match.group(0)): return # we don't handle tables yet. - if re.search(rst.ROLE_TAG + "$", before_match): + if rst.ROLE_TAG_STARTING_LINE_RE.search(before_match): # It's not a default role: it starts with a tag. continue - if re.search("^" + rst.ROLE_TAG, after_match): + if rst.ROLE_TAG_ENDING_LINE_RE.search(after_match): # It's not a default role: it ends with a tag. continue if match.group(0).startswith("``") and match.group(0).endswith("``"): @@ -274,7 +280,7 @@ def check_role_with_double_backticks(file, lines, options=None): if inline_literal is None: break before = paragraph[: inline_literal.start()] - if re.search(rst.ROLE_TAG + "$", before): + if rst.ROLE_TAG_ENDING_LINE_RE.search(before): error_offset = paragraph[: inline_literal.start()].count("\n") yield paragraph_lno + error_offset, "role use a single backtick, double backtick found." paragraph = ( @@ -325,6 +331,9 @@ def check_missing_space_before_default_role(file, lines, options=None): ) +_HYPERLINK_REFERENCE_RE = re.compile(r"\S* `_") + + @checker(".rst", ".po") def check_hyperlink_reference_missing_backtick(file, lines, options=None): """Search for missing backticks in front of hyperlink references. @@ -337,7 +346,7 @@ def check_hyperlink_reference_missing_backtick(file, lines, options=None): return # we don't handle tables yet. paragraph = clean_paragraph(paragraph) paragraph = rst.INTERPRETED_TEXT_RE.sub("", paragraph) - for hyperlink_reference in re.finditer(r"\S* `_", paragraph): + for hyperlink_reference in _HYPERLINK_REFERENCE_RE.finditer(paragraph): error_offset = paragraph[: hyperlink_reference.start()].count("\n") context = hyperlink_reference.group(0) yield ( @@ -391,6 +400,12 @@ def check_missing_final_newline(file, lines, options=None): yield len(lines), "No newline at end of file." +_is_long_interpreted_text = re.compile(r"^\s*\W*(:(\w+:)+)?`.*`\W*$").match +_is_directive_or_hyperlink = re.compile(r"^\s*\.\. ").match +_is_anonymous_hyperlink = re.compile(r"^\s*__ ").match +_is_very_long_string_literal = re.compile(r"^\s*``[^`]+``$").match + + @checker(".rst", ".po", enabled=False, rst_only=True) def check_line_too_long(file, lines, options=None): """Check for line length; this checker is not run by default.""" @@ -399,13 +414,13 @@ def check_line_too_long(file, lines, options=None): if len(line) - 1 > options.max_line_length: if line.lstrip()[0] in "+|": continue # ignore wide tables - if re.match(r"^\s*\W*(:(\w+:)+)?`.*`\W*$", line): + if _is_long_interpreted_text(line): continue # ignore long interpreted text - if re.match(r"^\s*\.\. ", line): + if _is_directive_or_hyperlink(line): continue # ignore directives and hyperlink targets - if re.match(r"^\s*__ ", line): + if _is_anonymous_hyperlink(line): continue # ignore anonymous hyperlink targets - if re.match(r"^\s*``[^`]+``$", line): + if _is_very_long_string_literal(line): continue # ignore a very long literal string yield lno + 1, f"Line too long ({len(line)-1}/{options.max_line_length})" @@ -438,6 +453,9 @@ def check_triple_backticks(file, lines, options=None): yield lno + 1, "There's no rst syntax using triple backticks" +_BAD_DEDENT_RE = re.compile(" [^ ].*::$") + + @checker(".rst", ".po", rst_only=False) def check_bad_dedent(file, lines, options=None): """Check for mis-alignment in indentation in code blocks. @@ -455,7 +473,7 @@ def check_bad_dedent(file, lines, options=None): def check_block(block_lineno, block): for lineno, line in enumerate(block.splitlines()): - if re.match(" [^ ].*::$", line): + if _BAD_DEDENT_RE.match(line): errors.append((block_lineno + lineno, "Bad dedent in block")) list(hide_non_rst_blocks(lines, hidden_block_cb=check_block)) diff --git a/sphinxlint/rst.py b/sphinxlint/rst.py index 0e8b7b473..4f4f98fc7 100644 --- a/sphinxlint/rst.py +++ b/sphinxlint/rst.py @@ -144,6 +144,8 @@ BEFORE_ROLE = r"(^|(?<=[\s(/'{\[*-]))" ROLE_TAG = rf":{SIMPLENAME}:" +ROLE_TAG_ENDING_LINE_RE = re.compile(ROLE_TAG + "$") +ROLE_TAG_STARTING_LINE_RE = re.compile("^" + ROLE_TAG) ROLE_HEAD = rf"({BEFORE_ROLE}:{SIMPLENAME}:)" # A role, with a clean start ASCII_ALLOWED_BEFORE_INLINE_MARKUP = r"""-:/'"<(\[{""" @@ -250,6 +252,7 @@ def inline_markup_gen(start_string, end_string, extra_allowed_before=""): START_STRING_PREFIX = f"(^|(?<=\\s|[{OPENERS}{DELIMITERS}|]))" END_STRING_SUFFIX = f"($|(?=\\s|[\x00{CLOSING_DELIMITERS}{DELIMITERS}{CLOSERS}|]))" +END_STRING_SUFFIX_RE = re.compile(END_STRING_SUFFIX) # Find role glued with another word like: # the:c:func:`PyThreadState_LeaveTracing` function. diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py index 6d51b0e3f..b99b508b3 100644 --- a/sphinxlint/utils.py +++ b/sphinxlint/utils.py @@ -151,6 +151,7 @@ def is_multiline_non_rst_block(line): _NON_RST_BLOCKS_CACHE = {} +_ZERO_OR_MORE_SPACES_RE = re.compile(" *") def hide_non_rst_blocks(lines, hidden_block_cb=None): @@ -172,7 +173,7 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): output = [] for lineno, line in enumerate(lines, start=1): if in_literal is not None: - current_indentation = len(re.match(" *", line).group(0)) + current_indentation = len(_ZERO_OR_MORE_SPACES_RE.match(line)[0]) if current_indentation > in_literal or line == "\n": excluded_lines.append(line if line == "\n" else line[in_literal:]) line = "\n" # Hiding line @@ -182,12 +183,12 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): hidden_block_cb(block_line_start, "".join(excluded_lines)) excluded_lines = [] if in_literal is None and is_multiline_non_rst_block(line): - in_literal = len(re.match(" *", line).group(0)) + in_literal = len(_ZERO_OR_MORE_SPACES_RE.match(line)[0]) block_line_start = lineno assert not excluded_lines if ( - _COMMENT_RE.search(line) - and type_of_explicit_markup(line) == "comment" + type_of_explicit_markup(line) == "comment" + and _COMMENT_RE.search(line) ): line = "\n" output.append(line) @@ -199,19 +200,26 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): return output +_is_directive = re.compile(rf"\.\. {rst.ALL_DIRECTIVES}::").match +_is_footnote = re.compile(r"\.\. \[[0-9]+\] ").match +_is_citation = re.compile(r"\.\. \[[^\]]+\] ").match +_is_target = re.compile(r"\.\. _.*[^_]: ").match +_is_substitution = re.compile(r"\.\. \|[^\|]*\| ").match + + @lru_cache() def type_of_explicit_markup(line): """Tell apart various explicit markup blocks.""" line = line.lstrip() - if re.match(rf"\.\. {rst.ALL_DIRECTIVES}::", line): + if _is_directive(line): return "directive" - if re.match(r"\.\. \[[0-9]+\] ", line): + if _is_footnote(line): return "footnote" - if re.match(r"\.\. \[[^\]]+\] ", line): + if _is_citation(line): return "citation" - if re.match(r"\.\. _.*[^_]: ", line): + if _is_target(line): return "target" - if re.match(r"\.\. \|[^\|]*\| ", line): + if _is_substitution(line): return "substitution_definition" return "comment" From e558a6ff0798845460a796979d21bcdb32f62c64 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Thu, 12 Oct 2023 11:02:13 +0200 Subject: [PATCH 23/36] fix 'n' cleanup --- sphinxlint/checkers.py | 17 +++++++++-------- tests/test_sphinxlint.py | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sphinxlint/checkers.py b/sphinxlint/checkers.py index e1e9c63dc..6997461f6 100644 --- a/sphinxlint/checkers.py +++ b/sphinxlint/checkers.py @@ -127,12 +127,12 @@ def check_default_role(file, lines, options=None): if (stripped_line.startswith("|") and stripped_line.endswith("|") and stripped_line.count("|") >= 4 and "|" in match.group(0)): return # we don't handle tables yet. - if rst.ROLE_TAG_STARTING_LINE_RE.search(before_match): - # It's not a default role: it starts with a tag. - continue - if rst.ROLE_TAG_ENDING_LINE_RE.search(after_match): + if rst.ROLE_TAG_ENDING_LINE_RE.search(before_match): # It's not a default role: it ends with a tag. continue + if rst.ROLE_TAG_STARTING_LINE_RE.search(after_match): + # It's not a default role: it starts with a tag. + continue if match.group(0).startswith("``") and match.group(0).endswith("``"): # It's not a default role: it's an inline literal. continue @@ -453,7 +453,7 @@ def check_triple_backticks(file, lines, options=None): yield lno + 1, "There's no rst syntax using triple backticks" -_BAD_DEDENT_RE = re.compile(" [^ ].*::$") +_contains_bad_dedent = re.compile(" [^ ].*::$").match @checker(".rst", ".po", rst_only=False) @@ -473,19 +473,20 @@ def check_bad_dedent(file, lines, options=None): def check_block(block_lineno, block): for lineno, line in enumerate(block.splitlines()): - if _BAD_DEDENT_RE.match(line): + if _contains_bad_dedent(line): errors.append((block_lineno + lineno, "Bad dedent in block")) list(hide_non_rst_blocks(lines, hidden_block_cb=check_block)) yield from errors -_DANGLING_HYPHEN_RE = re.compile(r".*[a-z]-$") +_contains_dangling_hyphen = re.compile(r".*[a-z]-$").match + @checker(".rst", rst_only=True) def check_dangling_hyphen(file, lines, options): """Check for lines ending in a hyphen.""" for lno, line in enumerate(lines): stripped_line = line.rstrip("\n") - if _DANGLING_HYPHEN_RE.match(stripped_line): + if _contains_dangling_hyphen(stripped_line): yield lno + 1, f"Line ends with dangling hyphen" diff --git a/tests/test_sphinxlint.py b/tests/test_sphinxlint.py index 841b8e394..baddfd416 100644 --- a/tests/test_sphinxlint.py +++ b/tests/test_sphinxlint.py @@ -64,7 +64,7 @@ def test_sphinxlint_shall_not_pass(file, expected_errors, capsys): assert expected_error in out number_of_expected_errors = len(expected_errors) number_of_reported_errors = len(out.splitlines()) - assert number_of_expected_errors == number_of_reported_errors + assert number_of_expected_errors == number_of_reported_errors, f"{number_of_reported_errors=}, {out=}" @pytest.mark.parametrize("file", [str(FIXTURE_DIR / "paragraphs.rst")]) From c3d5c3c38b21f6d67b5a59ff566e7cf984254e22 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Thu, 12 Oct 2023 13:54:34 +0200 Subject: [PATCH 24/36] address review --- sphinxlint/checkers.py | 10 +++++++--- sphinxlint/rst.py | 2 -- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/sphinxlint/checkers.py b/sphinxlint/checkers.py index 6997461f6..8b33c8812 100644 --- a/sphinxlint/checkers.py +++ b/sphinxlint/checkers.py @@ -109,6 +109,10 @@ def check_unbalanced_inline_literals_delimiters(file, lines, options=None): ) +_ends_with_role_tag = re.compile(rst.ROLE_TAG + "$").search +_starts_with_role_tag = re.compile("^" + rst.ROLE_TAG).search + + @checker(".rst", ".po", enabled=False) def check_default_role(file, lines, options=None): """Search for default roles (but they are allowed in many projects). @@ -127,10 +131,10 @@ def check_default_role(file, lines, options=None): if (stripped_line.startswith("|") and stripped_line.endswith("|") and stripped_line.count("|") >= 4 and "|" in match.group(0)): return # we don't handle tables yet. - if rst.ROLE_TAG_ENDING_LINE_RE.search(before_match): + if _ends_with_role_tag(before_match): # It's not a default role: it ends with a tag. continue - if rst.ROLE_TAG_STARTING_LINE_RE.search(after_match): + if _starts_with_role_tag(after_match): # It's not a default role: it starts with a tag. continue if match.group(0).startswith("``") and match.group(0).endswith("``"): @@ -280,7 +284,7 @@ def check_role_with_double_backticks(file, lines, options=None): if inline_literal is None: break before = paragraph[: inline_literal.start()] - if rst.ROLE_TAG_ENDING_LINE_RE.search(before): + if _ends_with_role_tag(before): error_offset = paragraph[: inline_literal.start()].count("\n") yield paragraph_lno + error_offset, "role use a single backtick, double backtick found." paragraph = ( diff --git a/sphinxlint/rst.py b/sphinxlint/rst.py index 4f4f98fc7..e33e44821 100644 --- a/sphinxlint/rst.py +++ b/sphinxlint/rst.py @@ -144,8 +144,6 @@ BEFORE_ROLE = r"(^|(?<=[\s(/'{\[*-]))" ROLE_TAG = rf":{SIMPLENAME}:" -ROLE_TAG_ENDING_LINE_RE = re.compile(ROLE_TAG + "$") -ROLE_TAG_STARTING_LINE_RE = re.compile("^" + ROLE_TAG) ROLE_HEAD = rf"({BEFORE_ROLE}:{SIMPLENAME}:)" # A role, with a clean start ASCII_ALLOWED_BEFORE_INLINE_MARKUP = r"""-:/'"<(\[{""" From 33d1564a9bda2c580e0de839c1719ca23835466c Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Thu, 12 Oct 2023 13:58:06 +0200 Subject: [PATCH 25/36] better names --- sphinxlint/utils.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py index b99b508b3..3fcf2a6b3 100644 --- a/sphinxlint/utils.py +++ b/sphinxlint/utils.py @@ -200,26 +200,26 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): return output -_is_directive = re.compile(rf"\.\. {rst.ALL_DIRECTIVES}::").match -_is_footnote = re.compile(r"\.\. \[[0-9]+\] ").match -_is_citation = re.compile(r"\.\. \[[^\]]+\] ").match -_is_target = re.compile(r"\.\. _.*[^_]: ").match -_is_substitution = re.compile(r"\.\. \|[^\|]*\| ").match +_contains_directive = re.compile(rf"\.\. {rst.ALL_DIRECTIVES}::").match +_contains_footnote = re.compile(r"\.\. \[[0-9]+\] ").match +_contains_citation = re.compile(r"\.\. \[[^\]]+\] ").match +_contains_target = re.compile(r"\.\. _.*[^_]: ").match +_contains_substitution = re.compile(r"\.\. \|[^\|]*\| ").match @lru_cache() def type_of_explicit_markup(line): """Tell apart various explicit markup blocks.""" line = line.lstrip() - if _is_directive(line): + if _contains_directive(line): return "directive" - if _is_footnote(line): + if _contains_footnote(line): return "footnote" - if _is_citation(line): + if _contains_citation(line): return "citation" - if _is_target(line): + if _contains_target(line): return "target" - if _is_substitution(line): + if _contains_substitution(line): return "substitution_definition" return "comment" From 1ebab9ffdb4dd3637867deac7be6d866738133b1 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Thu, 12 Oct 2023 14:14:58 +0200 Subject: [PATCH 26/36] more --- sphinxlint/checkers.py | 19 ++++++++++--------- sphinxlint/rst.py | 1 - sphinxlint/utils.py | 20 ++++++++++---------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/sphinxlint/checkers.py b/sphinxlint/checkers.py index 8b33c8812..3a49be6ee 100644 --- a/sphinxlint/checkers.py +++ b/sphinxlint/checkers.py @@ -64,6 +64,7 @@ def check_missing_backtick_after_role(file, lines, options=None): _RST_ROLE_RE = re.compile("``.+?``(?!`).", flags=re.DOTALL) +_END_STRING_SUFFIX_RE = re.compile(rst.END_STRING_SUFFIX) @checker(".rst", ".po") @@ -78,7 +79,7 @@ def check_missing_space_after_literal(file, lines, options=None): return # we don't handle tables yet. paragraph = clean_paragraph(paragraph) for role in _RST_ROLE_RE.finditer(paragraph): - if not rst.END_STRING_SUFFIX_RE.match(role[0][-1]): + if not _END_STRING_SUFFIX_RE.match(role[0][-1]): error_offset = paragraph[: role.start()].count("\n") yield ( paragraph_lno + error_offset, @@ -405,8 +406,8 @@ def check_missing_final_newline(file, lines, options=None): _is_long_interpreted_text = re.compile(r"^\s*\W*(:(\w+:)+)?`.*`\W*$").match -_is_directive_or_hyperlink = re.compile(r"^\s*\.\. ").match -_is_anonymous_hyperlink = re.compile(r"^\s*__ ").match +_starts_with_directive_or_hyperlink = re.compile(r"^\s*\.\. ").match +_starts_with_anonymous_hyperlink = re.compile(r"^\s*__ ").match _is_very_long_string_literal = re.compile(r"^\s*``[^`]+``$").match @@ -420,9 +421,9 @@ def check_line_too_long(file, lines, options=None): continue # ignore wide tables if _is_long_interpreted_text(line): continue # ignore long interpreted text - if _is_directive_or_hyperlink(line): + if _starts_with_directive_or_hyperlink(line): continue # ignore directives and hyperlink targets - if _is_anonymous_hyperlink(line): + if _starts_with_anonymous_hyperlink(line): continue # ignore anonymous hyperlink targets if _is_very_long_string_literal(line): continue # ignore a very long literal string @@ -457,7 +458,7 @@ def check_triple_backticks(file, lines, options=None): yield lno + 1, "There's no rst syntax using triple backticks" -_contains_bad_dedent = re.compile(" [^ ].*::$").match +_has_bad_dedent = re.compile(" [^ ].*::$").match @checker(".rst", ".po", rst_only=False) @@ -477,14 +478,14 @@ def check_bad_dedent(file, lines, options=None): def check_block(block_lineno, block): for lineno, line in enumerate(block.splitlines()): - if _contains_bad_dedent(line): + if _has_bad_dedent(line): errors.append((block_lineno + lineno, "Bad dedent in block")) list(hide_non_rst_blocks(lines, hidden_block_cb=check_block)) yield from errors -_contains_dangling_hyphen = re.compile(r".*[a-z]-$").match +_has_dangling_hyphen = re.compile(r".*[a-z]-$").match @checker(".rst", rst_only=True) @@ -492,5 +493,5 @@ def check_dangling_hyphen(file, lines, options): """Check for lines ending in a hyphen.""" for lno, line in enumerate(lines): stripped_line = line.rstrip("\n") - if _contains_dangling_hyphen(stripped_line): + if _has_dangling_hyphen(stripped_line): yield lno + 1, f"Line ends with dangling hyphen" diff --git a/sphinxlint/rst.py b/sphinxlint/rst.py index e33e44821..0e8b7b473 100644 --- a/sphinxlint/rst.py +++ b/sphinxlint/rst.py @@ -250,7 +250,6 @@ def inline_markup_gen(start_string, end_string, extra_allowed_before=""): START_STRING_PREFIX = f"(^|(?<=\\s|[{OPENERS}{DELIMITERS}|]))" END_STRING_SUFFIX = f"($|(?=\\s|[\x00{CLOSING_DELIMITERS}{DELIMITERS}{CLOSERS}|]))" -END_STRING_SUFFIX_RE = re.compile(END_STRING_SUFFIX) # Find role glued with another word like: # the:c:func:`PyThreadState_LeaveTracing` function. diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py index 3fcf2a6b3..c55d541f4 100644 --- a/sphinxlint/utils.py +++ b/sphinxlint/utils.py @@ -200,26 +200,26 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): return output -_contains_directive = re.compile(rf"\.\. {rst.ALL_DIRECTIVES}::").match -_contains_footnote = re.compile(r"\.\. \[[0-9]+\] ").match -_contains_citation = re.compile(r"\.\. \[[^\]]+\] ").match -_contains_target = re.compile(r"\.\. _.*[^_]: ").match -_contains_substitution = re.compile(r"\.\. \|[^\|]*\| ").match +_starts_with_directive_marker = re.compile(rf"\.\. {rst.ALL_DIRECTIVES}::").match +_starts_with_footnote_marker = re.compile(r"\.\. \[[0-9]+\] ").match +_starts_with_citation_marker = re.compile(r"\.\. \[[^\]]+\] ").match +_starts_with_target = re.compile(r"\.\. _.*[^_]: ").match +_starts_with_substitution_definition = re.compile(r"\.\. \|[^\|]*\| ").match @lru_cache() def type_of_explicit_markup(line): """Tell apart various explicit markup blocks.""" line = line.lstrip() - if _contains_directive(line): + if _starts_with_directive_marker(line): return "directive" - if _contains_footnote(line): + if _starts_with_footnote_marker(line): return "footnote" - if _contains_citation(line): + if _starts_with_citation_marker(line): return "citation" - if _contains_target(line): + if _starts_with_target(line): return "target" - if _contains_substitution(line): + if _starts_with_substitution_definition(line): return "substitution_definition" return "comment" From 61c449beb54e622c1eb1fca5585931221da9da46 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Thu, 12 Oct 2023 16:39:15 +0200 Subject: [PATCH 27/36] Apply suggestions from code review Co-authored-by: Hugo van Kemenade --- sphinxlint/checkers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sphinxlint/checkers.py b/sphinxlint/checkers.py index 3a49be6ee..791cac6ca 100644 --- a/sphinxlint/checkers.py +++ b/sphinxlint/checkers.py @@ -88,7 +88,7 @@ def check_missing_space_after_literal(file, lines, options=None): ) -_LONE_DOUBLEBACKTICK_RE = re.compile("(? 4: return # we don't handle tables yet. paragraph = clean_paragraph(paragraph) - for lone_double_backtick in _LONE_DOUBLEBACKTICK_RE.finditer(paragraph): + for lone_double_backtick in _LONE_DOUBLE_BACKTICK_RE.finditer(paragraph): error_offset = paragraph[: lone_double_backtick.start()].count("\n") yield ( paragraph_lno + error_offset, From a901d6a1bfa0fbbfb36f52444b624add7e861338 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Fri, 13 Oct 2023 00:20:27 +0200 Subject: [PATCH 28/36] Improve the caching strategy employed in `utils.py` --- sphinxlint/sphinxlint.py | 30 +++++++++++++++++------------- sphinxlint/utils.py | 17 +++++++++++++---- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/sphinxlint/sphinxlint.py b/sphinxlint/sphinxlint.py index 646e7294a..ec699dbf9 100644 --- a/sphinxlint/sphinxlint.py +++ b/sphinxlint/sphinxlint.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from os.path import splitext -from sphinxlint.utils import hide_non_rst_blocks, po2rst +from sphinxlint.utils import PER_FILE_CACHES, hide_non_rst_blocks, po2rst @dataclass(frozen=True) @@ -50,16 +50,20 @@ def check_text(filename, text, checkers, options=None): def check_file(filename, checkers, options: CheckersOptions = None): - ext = splitext(filename)[1] - if not any(ext in checker.suffixes for checker in checkers): - return Counter() try: - with open(filename, encoding="utf-8") as f: - text = f.read() - if filename.endswith(".po"): - text = po2rst(text) - except OSError as err: - return [f"{filename}: cannot open: {err}"] - except UnicodeDecodeError as err: - return [f"{filename}: cannot decode as UTF-8: {err}"] - return check_text(filename, text, checkers, options) + ext = splitext(filename)[1] + if not any(ext in checker.suffixes for checker in checkers): + return Counter() + try: + with open(filename, encoding="utf-8") as f: + text = f.read() + if filename.endswith(".po"): + text = po2rst(text) + except OSError as err: + return [f"{filename}: cannot open: {err}"] + except UnicodeDecodeError as err: + return [f"{filename}: cannot decode as UTF-8: {err}"] + return check_text(filename, text, checkers, options) + finally: + for memoized_function in PER_FILE_CACHES: + memoized_function.cache_clear() diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py index c55d541f4..77f22cf03 100644 --- a/sphinxlint/utils.py +++ b/sphinxlint/utils.py @@ -7,6 +7,15 @@ from sphinxlint import rst +PER_FILE_CACHES = [] + + +def _per_file_cache(func): + memoized_func = lru_cache(maxsize=None)(func) + PER_FILE_CACHES.append(memoized_func) + return memoized_func + + def match_size(re_match): return re_match.end() - re_match.start() @@ -29,7 +38,7 @@ def _clean_heuristic(paragraph, regex): paragraph = paragraph[: candidate.start()] + paragraph[candidate.end() :] -@lru_cache() +@_per_file_cache def clean_paragraph(paragraph): """Removes all good constructs, so detectors can focus on bad ones. @@ -45,7 +54,7 @@ def clean_paragraph(paragraph): return paragraph.replace("\x00", "\\") -@lru_cache() +@_per_file_cache def escape2null(text): r"""Return a string with escape-backslashes converted to nulls. @@ -79,7 +88,7 @@ def escape2null(text): start = found + 2 # skip character after escape -@lru_cache() +@_per_file_cache def paragraphs(lines): """Yield (paragraph_line_no, paragraph_text) pairs describing paragraphs of the given lines. @@ -207,7 +216,7 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): _starts_with_substitution_definition = re.compile(r"\.\. \|[^\|]*\| ").match -@lru_cache() +@_per_file_cache def type_of_explicit_markup(line): """Tell apart various explicit markup blocks.""" line = line.lstrip() From e3515d92eb56b79b30e5a4dcfe159a3a7a4e5edd Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Fri, 13 Oct 2023 00:48:16 +0200 Subject: [PATCH 29/36] address review --- sphinxlint/utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py index 77f22cf03..198e448d3 100644 --- a/sphinxlint/utils.py +++ b/sphinxlint/utils.py @@ -10,7 +10,7 @@ PER_FILE_CACHES = [] -def _per_file_cache(func): +def per_file_cache(func): memoized_func = lru_cache(maxsize=None)(func) PER_FILE_CACHES.append(memoized_func) return memoized_func @@ -38,7 +38,7 @@ def _clean_heuristic(paragraph, regex): paragraph = paragraph[: candidate.start()] + paragraph[candidate.end() :] -@_per_file_cache +@per_file_cache def clean_paragraph(paragraph): """Removes all good constructs, so detectors can focus on bad ones. @@ -54,7 +54,7 @@ def clean_paragraph(paragraph): return paragraph.replace("\x00", "\\") -@_per_file_cache +@per_file_cache def escape2null(text): r"""Return a string with escape-backslashes converted to nulls. @@ -88,7 +88,7 @@ def escape2null(text): start = found + 2 # skip character after escape -@_per_file_cache +@per_file_cache def paragraphs(lines): """Yield (paragraph_line_no, paragraph_text) pairs describing paragraphs of the given lines. @@ -216,7 +216,7 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): _starts_with_substitution_definition = re.compile(r"\.\. \|[^\|]*\| ").match -@_per_file_cache +@per_file_cache def type_of_explicit_markup(line): """Tell apart various explicit markup blocks.""" line = line.lstrip() From 715ffb232ad4eba0e9f491e8191cf9c9760cf1c2 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Fri, 13 Oct 2023 00:51:25 +0200 Subject: [PATCH 30/36] Revert "A more complex cache for `hide_non_rst_blocks()`" This reverts commit 7d63ef34ecc9d594692f6b39974a43e9b565414b. --- sphinxlint/utils.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/sphinxlint/utils.py b/sphinxlint/utils.py index 198e448d3..1dfb3ffec 100644 --- a/sphinxlint/utils.py +++ b/sphinxlint/utils.py @@ -159,7 +159,6 @@ def is_multiline_non_rst_block(line): return False -_NON_RST_BLOCKS_CACHE = {} _ZERO_OR_MORE_SPACES_RE = re.compile(" *") @@ -168,14 +167,7 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): The filter actually replace "removed" lines by empty lines, so the line numbering still make sense. - - This function is quite hot, so we cache the returned value where possible. - The function is only "pure" when hidden_block_cb is None, however, - so we can only safely cache the output when hidden_block_cb=None. """ - lines = tuple(lines) - if hidden_block_cb is None and lines in _NON_RST_BLOCKS_CACHE: - return _NON_RST_BLOCKS_CACHE[lines] in_literal = None excluded_lines = [] block_line_start = None @@ -203,10 +195,7 @@ def hide_non_rst_blocks(lines, hidden_block_cb=None): output.append(line) if excluded_lines and hidden_block_cb: hidden_block_cb(block_line_start, "".join(excluded_lines)) - output = tuple(output) - if hidden_block_cb is None: - _NON_RST_BLOCKS_CACHE[lines] = output - return output + return tuple(output) _starts_with_directive_marker = re.compile(rf"\.\. {rst.ALL_DIRECTIVES}::").match From 5ec032065fcef880b17ce65628f454cbab1e735d Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Fri, 13 Oct 2023 08:45:18 +0200 Subject: [PATCH 31/36] Update Python version used to test friend projects. --- .github/workflows/tests.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 27d7e2512..84a157e3b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,6 +13,8 @@ jobs: strategy: fail-fast: false matrix: + # when adding new versions, update the one used to test + # friend projects below to the latest one python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] os: [ubuntu-latest, macos-latest, windows-latest] steps: @@ -31,7 +33,7 @@ jobs: - name: Install sphinx-lint to pull dependencies run: python -m pip install -v . - name: Download more tests from friend projects - if: matrix.python-version == '3.11' && matrix.os == 'ubuntu-latest' + if: matrix.python-version == '3.12' && matrix.os == 'ubuntu-latest' run: sh download-more-tests.sh - name: run tests run: python -m pytest From 946ac53b671ef86095a02543025a989c40b428be Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Fri, 13 Oct 2023 12:27:57 +0200 Subject: [PATCH 32/36] Add a deploy.yml that supports trusted publishing. --- .github/workflows/deploy.yml | 73 ++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 .github/workflows/deploy.yml diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 000000000..ad847062b --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,73 @@ +name: Deploy + +on: + push: + branches: [main] + pull_request: + release: + types: + - published + workflow_dispatch: + +permissions: + contents: read + +jobs: + # Always build & lint package. + build-package: + name: Build & verify package + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: hynek/build-and-inspect-python-package@v1 + + # Upload to Test PyPI on every commit on main. + release-test-pypi: + name: Publish in-dev package to test.pypi.org + if: | + github.repository_owner == 'sphinx-contrib' + && github.event_name == 'push' + && github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + needs: build-package + + permissions: + id-token: write + + steps: + - name: Download packages built by build-and-inspect-python-package + uses: actions/download-artifact@v3 + with: + name: Packages + path: dist + + - name: Upload package to Test PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + + # Upload to real PyPI on GitHub Releases. + release-pypi: + name: Publish released package to pypi.org + if: | + github.repository_owner == 'sphinx-contrib' + && github.event.action == 'published' + runs-on: ubuntu-latest + needs: build-package + + permissions: + id-token: write + + steps: + - name: Download packages built by build-and-inspect-python-package + uses: actions/download-artifact@v3 + with: + name: Packages + path: dist + + - name: Upload package to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 From e47c7e1ca414103534c876725febeafff3e26698 Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Fri, 13 Oct 2023 12:37:09 +0200 Subject: [PATCH 33/36] Switch from setuptools to hatchling in pyproject.toml. --- pyproject.toml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4eac7c559..ca6cbdb01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,9 @@ [build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta" +build-backend = "hatchling.build" +requires = [ + "hatch-vcs", + "hatchling", +] [project] name = "sphinx-lint" @@ -33,10 +36,11 @@ repository = "https://github.com/sphinx-contrib/sphinx-lint" [project.scripts] sphinx-lint = "sphinxlint.__main__:main" -[tool.setuptools] -packages = ["sphinxlint"] -include-package-data = false -dynamic.version.attr = "sphinxlint.__version__" +[tool.hatch] +version.source = "vcs" + +[tool.hatch.version.raw-options] +local_scheme = "no-local-version" [tool.black] From 1c6d8f03c964eb65ab62790d56b11da5f53555a5 Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Fri, 13 Oct 2023 12:40:21 +0200 Subject: [PATCH 34/36] Use importlib.metadata to get the version. --- sphinxlint/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sphinxlint/__init__.py b/sphinxlint/__init__.py index 28d5740cf..2d3c7f1bf 100644 --- a/sphinxlint/__init__.py +++ b/sphinxlint/__init__.py @@ -1,7 +1,9 @@ """Sphinx linter.""" -__version__ = "0.6.8" +import importlib.metadata from sphinxlint.sphinxlint import check_file, check_text +__version__ = importlib.metadata.version(__name__) + __all__ = ["check_text", "check_file"] From 8c4a652d62b35445b6afb5cb6a470e9613ba3d3d Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Fri, 13 Oct 2023 12:44:32 +0200 Subject: [PATCH 35/36] Hardcode package name. Co-authored-by: Hugo van Kemenade --- sphinxlint/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxlint/__init__.py b/sphinxlint/__init__.py index 2d3c7f1bf..91df07472 100644 --- a/sphinxlint/__init__.py +++ b/sphinxlint/__init__.py @@ -4,6 +4,6 @@ from sphinxlint.sphinxlint import check_file, check_text -__version__ = importlib.metadata.version(__name__) +__version__ = importlib.metadata.version("sphinx_lint") __all__ = ["check_text", "check_file"] From b61542c40acf353167156f7ea2217826892a3e57 Mon Sep 17 00:00:00 2001 From: rtobar Date: Fri, 13 Oct 2023 20:44:59 +0800 Subject: [PATCH 36/36] Fix for suggestion applied blindly during review (#86) Signed-off-by: Rodrigo Tobar --- sphinxlint/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxlint/__main__.py b/sphinxlint/__main__.py index 5e60ae07a..59ab56ca7 100644 --- a/sphinxlint/__main__.py +++ b/sphinxlint/__main__.py @@ -60,7 +60,7 @@ def __call__(self, parser, namespace, values, option_string=None): class StoreNumJobsAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, self._job_count(values)) + setattr(namespace, self.dest, self.job_count(values)) @staticmethod def job_count(values):