From 3497b2ba30ba252b934714c92b2d4ec2c67e7815 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Fri, 6 Jan 2023 14:24:12 +0100 Subject: [PATCH 01/20] Drop unused version.sphinxopts --- build_docs.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/build_docs.py b/build_docs.py index 8e8866f..0d0ed1f 100755 --- a/build_docs.py +++ b/build_docs.py @@ -80,7 +80,6 @@ def __init__( status, branch=None, tag=None, - sphinxopts=(), ): if status not in self.STATUSES: raise ValueError( @@ -93,7 +92,6 @@ def __init__( raise ValueError("Please build a version with at least a branch or a tag.") self.branch_or_tag = branch or tag self.status = status - self.sphinxopts = list(sphinxopts) def __repr__(self): return f"Version({self.name})" @@ -754,7 +752,7 @@ def build(self): self.version.name, self.language.tag, ) - sphinxopts = list(self.language.sphinxopts) + list(self.version.sphinxopts) + sphinxopts = list(self.language.sphinxopts) sphinxopts.extend(["-q"]) if self.language.tag != "en": locale_dirs = self.build_root / self.version.name / "locale" From 2b53a17b167ecefe190a1ddde03cf05be9095c72 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Fri, 6 Jan 2023 16:15:48 +0100 Subject: [PATCH 02/20] Use a config file for easier discoverability. It was previously "hidden" deep in the py file. --- build_docs.py | 160 ++++++++++++++++++++++++++++---------------------- config.ini | 92 +++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 70 deletions(-) create mode 100644 config.ini diff --git a/build_docs.py b/build_docs.py index 0d0ed1f..28c26e9 100755 --- a/build_docs.py +++ b/build_docs.py @@ -2,8 +2,8 @@ """Build the Python docs for various branches and various languages. -Without any arguments builds docs for all active versions configured in the -global VERSIONS list and all languages configured in the LANGUAGES list. +Without any arguments builds docs for all active versions and +languages configured in the config.ini file. -q selects "quick build", which means to build only HTML. @@ -20,6 +20,7 @@ """ from argparse import ArgumentParser +import configparser from contextlib import suppress from dataclasses import dataclass import filecmp @@ -41,6 +42,7 @@ from pathlib import Path from string import Template from textwrap import indent +from typing import Iterable import zc.lockfile import jinja2 @@ -154,14 +156,14 @@ def filter(versions, branch=None): return [v for v in versions if v.status not in ("EOL", "security-fixes")] @staticmethod - def current_stable(): + def current_stable(versions): """Find the current stable cPython version.""" - return max([v for v in VERSIONS if v.status == "stable"], key=Version.as_tuple) + return max([v for v in versions if v.status == "stable"], key=Version.as_tuple) @staticmethod - def current_dev(): + def current_dev(versions): """Find the current de cPython version.""" - return max(VERSIONS, key=Version.as_tuple) + return max(versions, key=Version.as_tuple) @property def picker_label(self): @@ -172,7 +174,7 @@ def picker_label(self): return f"pre ({self.name})" return self.name - def setup_indexsidebar(self, dest_path): + def setup_indexsidebar(self, versions, dest_path): """Build indexsidebar.html for Sphinx.""" with open( HERE / "templates" / "indexsidebar.html", encoding="UTF-8" @@ -183,7 +185,7 @@ def setup_indexsidebar(self, dest_path): sidebar_template.render( current_version=self, versions=sorted( - VERSIONS, key=lambda v: version_to_tuple(v.name), reverse=True + versions, key=lambda v: version_to_tuple(v.name), reverse=True ), ) ) @@ -195,7 +197,6 @@ def __gt__(self, other): return self.as_tuple() > other.as_tuple() - @dataclass(frozen=True, order=True) class Language: tag: str @@ -206,23 +207,6 @@ class Language: html_only: bool = False -# EOL and security-fixes are not automatically built, no need to remove them -# from the list, this way we can still rebuild them manually as needed. -# -# Please keep the list in reverse-order for ease of editing. -VERSIONS = [ - Version("3.13", branch="origin/main", status="in development"), - Version("3.12", branch="origin/3.12", status="stable"), - Version("3.11", branch="origin/3.11", status="stable"), - Version("3.10", branch="origin/3.10", status="security-fixes"), - Version("3.9", branch="origin/3.9", status="security-fixes"), - Version("3.8", branch="origin/3.8", status="security-fixes"), - Version("3.7", tag="3.7", status="EOL"), - Version("3.6", tag="3.6", status="EOL"), - Version("3.5", tag="3.5", status="EOL"), - Version("2.7", tag="2.7", status="EOL"), -] - XELATEX_DEFAULT = ( "-D latex_engine=xelatex", "-D latex_elements.inputenc=", @@ -285,22 +269,6 @@ class Language: r"-D latex_elements.fontenc=\\usepackage{xeCJK}", ) -LANGUAGES = { - Language("en", "en", "English", True, XELATEX_DEFAULT), - Language("es", "es", "Spanish", True, XELATEX_WITH_FONTSPEC), - Language("fr", "fr", "French", True, XELATEX_WITH_FONTSPEC), - Language("id", "id", "Indonesian", False, XELATEX_DEFAULT), - Language("it", "it", "Italian", False, XELATEX_DEFAULT), - Language("ja", "ja", "Japanese", True, LUALATEX_FOR_JP), - Language("ko", "ko", "Korean", True, XELATEX_FOR_KOREAN), - Language("pl", "pl", "Polish", False, XELATEX_DEFAULT), - Language("pt-br", "pt_BR", "Brazilian Portuguese", True, XELATEX_DEFAULT), - Language("tr", "tr", "Turkish", True, XELATEX_DEFAULT), - Language("uk", "uk", "Ukrainian", False, XELATEX_DEFAULT, html_only=True), - Language("zh-cn", "zh_CN", "Simplified Chinese", True, XELATEX_WITH_CJK), - Language("zh-tw", "zh_TW", "Traditional Chinese", True, XELATEX_WITH_CJK), -} - def run(cmd, cwd=None) -> subprocess.CompletedProcess: """Like subprocess.run, with logging before and after the command execution.""" @@ -446,7 +414,9 @@ def edit(file: Path): temporary.rename(file) -def setup_switchers(html_root: Path): +def setup_switchers( + versions: Iterable[Version], languages: Iterable[Language], html_root: Path +): """Setup cross-links between cpython versions: - Cross-link various languages in a language switcher - Cross-link various versions in a version switcher @@ -464,7 +434,7 @@ def setup_switchers(html_root: Path): sorted( [ (language.tag, language.name) - for language in LANGUAGES + for language in languages if language.in_prod ] ) @@ -475,7 +445,7 @@ def setup_switchers(html_root: Path): [ (version.name, version.picker_label) for version in sorted( - VERSIONS, + versions, key=lambda v: version_to_tuple(v.name), reverse=True, ) @@ -499,7 +469,13 @@ def setup_switchers(html_root: Path): ofile.write(line) -def build_robots_txt(www_root: Path, group, skip_cache_invalidation): +def build_robots_txt( + versions: Iterable[Version], + languages: Iterable[Language], + www_root: Path, + group, + skip_cache_invalidation, +): """Disallow crawl of EOL versions in robots.txt.""" if not www_root.exists(): logging.info("Skipping robots.txt generation (www root does not even exists).") @@ -509,7 +485,7 @@ def build_robots_txt(www_root: Path, group, skip_cache_invalidation): template = jinja2.Template(template_file.read()) with open(robots_file, "w", encoding="UTF-8") as robots_txt_file: robots_txt_file.write( - template.render(languages=LANGUAGES, versions=VERSIONS) + "\n" + template.render(languages=languages, versions=versions) + "\n" ) robots_file.chmod(0o775) run(["chgrp", group, robots_file]) @@ -517,7 +493,9 @@ def build_robots_txt(www_root: Path, group, skip_cache_invalidation): requests.request("PURGE", "https://docs.python.org/robots.txt") -def build_sitemap(www_root: Path, group): +def build_sitemap( + versions: Iterable[Version], languages: Iterable[Language], www_root: Path, group +): """Build a sitemap with all live versions and translations.""" if not www_root.exists(): logging.info("Skipping sitemap generation (www root does not even exists).") @@ -526,7 +504,7 @@ def build_sitemap(www_root: Path, group): template = jinja2.Template(template_file.read()) sitemap_file = www_root / "sitemap.xml" sitemap_file.write_text( - template.render(languages=LANGUAGES, versions=VERSIONS) + "\n", encoding="UTF-8" + template.render(languages=languages, versions=versions) + "\n", encoding="UTF-8" ) sitemap_file.chmod(0o664) run(["chgrp", group, sitemap_file]) @@ -631,8 +609,9 @@ def parse_args(): parser.add_argument( "--languages", nargs="*", - default={language.tag for language in LANGUAGES}, - help="Language translation, as a PEP 545 language tag like" " 'fr' or 'pt-br'.", + default="all", + help="Language translation, as a PEP 545 language tag like" " 'fr' or 'pt-br'. " + "Use 'all' to build all of them (it's the default behavior).", metavar="fr", ) parser.add_argument( @@ -677,7 +656,9 @@ class DocBuilder: """Builder for a cpython version and a language.""" version: Version + versions: Iterable[Version] language: Language + languages: Iterable[Language] build_root: Path www_root: Path quick: bool @@ -783,7 +764,7 @@ def build(self): if self.version.status in ("in development", "pre-release") else "stable" ) - + ("" if self.full_build else "-html") + + ("" if self.full_build else "-html") ) logging.info("Running make %s", maketarget) python = self.venv / "bin" / "python" @@ -799,7 +780,8 @@ def build(self): ] ) self.version.setup_indexsidebar( - self.checkout / "Doc" / "tools" / "templates" / "indexsidebar.html" + self.versions, + self.checkout / "Doc" / "tools" / "templates" / "indexsidebar.html", ) run( [ @@ -817,7 +799,9 @@ def build(self): ) run(["mkdir", "-p", self.log_directory]) run(["chgrp", "-R", self.group, self.log_directory]) - setup_switchers(self.checkout / "Doc" / "build" / "html") + setup_switchers( + self.versions, self.languages, self.checkout / "Doc" / "build" / "html" + ) logging.info( "Build done for version: %s, language: %s", self.version.name, @@ -990,7 +974,9 @@ def symlink(www_root: Path, language: Language, directory: str, name: str, group purge_path(www_root, link) -def major_symlinks(www_root: Path, group): +def major_symlinks( + www_root: Path, group, versions: Iterable[Version], languages: Iterable[Language] +): """Maintains the /2/ and /3/ symlinks for each languages. Like: @@ -998,13 +984,13 @@ def major_symlinks(www_root: Path, group): - /fr/3/ → /fr/3.9/ - /es/3/ → /es/3.9/ """ - current_stable = Version.current_stable().name - for language in LANGUAGES: + current_stable = Version.current_stable(versions).name + for language in languages: symlink(www_root, language, current_stable, "3", group) symlink(www_root, language, "2.7", "2", group) -def dev_symlink(www_root: Path, group): +def dev_symlink(www_root: Path, group, versions, languages): """Maintains the /dev/ symlinks for each languages. Like: @@ -1012,8 +998,8 @@ def dev_symlink(www_root: Path, group): - /fr/dev/ → /fr/3.11/ - /es/dev/ → /es/3.11/ """ - current_dev = Version.current_dev().name - for language in LANGUAGES: + current_dev = Version.current_dev(versions).name + for language in languages: symlink(www_root, language, current_dev, "dev", group) @@ -1051,14 +1037,46 @@ def purge_path(www_root: Path, path: Path): run(["curl", "-XPURGE", f"https://docs.python.org/{{{','.join(to_purge)}}}"]) +def parse_config(): + config = configparser.ConfigParser() + config.read(HERE / "config.ini") + versions, languages = [], [] + for name, section in config.items(): + if section.get("status"): # It's a version + versions.append( + Version( + name, + status=section["status"], + branch=section.get("branch"), + tag=section.get("tag"), + ) + ) + if section.get("name"): # It's a language + languages.append( + Language( + name, + section.get("iso639_tag", name), + section["name"], + section.getboolean("in_prod", True), + sphinxopts=globals()[section.get("sphinxopts", "XELATEX_DEFAULT")], + html_only=section.get("html_only", False), + ) + ) + return versions, languages + + def build_docs(args) -> bool: """Build all docs (each languages and each versions).""" - languages_dict = {language.tag: language for language in LANGUAGES} - versions = Version.filter(VERSIONS, args.branch) - languages = [languages_dict[tag] for tag in args.languages] - del args.languages + versions, languages = parse_config() + languages_dict = {language.tag: language for language in languages} + todo = list( + product( + Version.filter(versions, args.branch), + [languages_dict[tag] for tag in args.languages], + ) + ) del args.branch - todo = list(product(versions, languages)) + del args.languages all_built_successfully = True while todo: version, language = todo.pop() @@ -1066,13 +1084,15 @@ def build_docs(args) -> bool: with sentry_sdk.configure_scope() as scope: scope.set_tag("version", version.name) scope.set_tag("language", language.tag) - builder = DocBuilder(version, language, **vars(args)) + builder = DocBuilder(version, versions, language, languages, **vars(args)) all_built_successfully &= builder.run() - build_sitemap(args.www_root, args.group) + build_sitemap(versions, languages, args.www_root, args.group) build_404(args.www_root, args.group) - build_robots_txt(args.www_root, args.group, args.skip_cache_invalidation) - major_symlinks(args.www_root, args.group) - dev_symlink(args.www_root, args.group) + build_robots_txt( + versions, languages, args.www_root, args.group, args.skip_cache_invalidation + ) + major_symlinks(args.www_root, args.group, versions, languages) + dev_symlink(args.www_root, args.group, versions, languages) proofread_canonicals(args.www_root, args.skip_cache_invalidation) return all_built_successfully diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..eba1d88 --- /dev/null +++ b/config.ini @@ -0,0 +1,92 @@ +# EOL and security-fixes are not automatically built, no need to remove them +# from the list, this way we can still easily rebuild them manually as needed. +# +# Please keep the list in reverse-order for ease of editing. + +[3.12] +branch = origin/main +status = in development + +[3.11] +branch = origin/3.11 +status = stable + +[3.10] +branch = origin/3.10 +status = stable + +[3.9] +branch = origin/3.9 +status = security-fixes + +[3.8] +branch = origin/3.8 +status = security-fixes + +[3.7] +branch = origin/3.7 +status = security-fixes + +[3.6] +tag = 3.6 +status = EOL + +[3.5] +tag = 3.5 +status = EOL + +[2.7] +tag = 2.7 +status = EOL + +[en] +name = English + +[es] +name = Spanish +sphinxopts = XELATEX_WITH_FONTSPEC + +[fr] +name = French +sphinxopts = XELATEX_WITH_FONTSPEC + +[id] +name = Indonesian +in_prod = False + +[it] +name = Italian +in_prod = False + +[ja] +name = Japanese +html_only = True # See https://github.com/python/python-docs-ja/issues/35 + +[ko] +name = Korean +sphinxopts = XELATEX_FOR_KOREAN + +[pl] +name = Polish +in_prod = False + +[pt-br] +iso639_tag = pt_BR +name = Brazilian Portuguese + +[tr] +name = Turkish + +[uk] +name = Ukrainian +in_prod = False + +[zh-cn] +iso639_tag = zh_CN +name = Simplified Chinese +sphinxopts = XELATEX_WITH_CJK + +[zh-tw] +iso639_tag = zh_TW +name = Traditional Chinese +sphinxopts = XELATEX_WITH_CJK From 850e5a388b9cd0482e8eb8e98b3c1c264f1731b9 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Thu, 12 Oct 2023 09:18:59 +0200 Subject: [PATCH 03/20] Use branch info from the devguide. --- build_docs.py | 94 +++++++++++++++++++++++++++++---------------------- config.ini | 41 ---------------------- 2 files changed, 54 insertions(+), 81 deletions(-) diff --git a/build_docs.py b/build_docs.py index 28c26e9..c77c4ab 100755 --- a/build_docs.py +++ b/build_docs.py @@ -75,24 +75,29 @@ class Version: STATUSES = {"EOL", "security-fixes", "stable", "pre-release", "in development"} + # Those synonyms map branch status vocabulary found in the devguide + # with our vocabulary. + SYNONYMS = { + "feature": "in development", + "bugfix": "stable", + "security": "security-fixes", + "end-of-life": "EOL", + } + def __init__( self, name, *, status, - branch=None, - tag=None, + branch_or_tag=None, ): + status = self.SYNONYMS.get(status, status) if status not in self.STATUSES: raise ValueError( - f"Version status expected to be in {', '.join(self.STATUSES)}" + f"Version status expected to be one of: {', '.join(self.STATUSES|set(self.SYNONYMS.keys()))}, got {status!r}." ) self.name = name - if branch is not None and tag is not None: - raise ValueError("Please build a version from either a branch or a tag.") - if branch is None and tag is None: - raise ValueError("Please build a version with at least a branch or a tag.") - self.branch_or_tag = branch or tag + self.branch_or_tag = branch_or_tag self.status = status def __repr__(self): @@ -190,6 +195,10 @@ def setup_indexsidebar(self, versions, dest_path): ) ) + @classmethod + def from_json(cls, name, values): + return cls(name, status=values["status"], branch_or_tag=values["branch"]) + def __eq__(self, other): return self.name == other.name @@ -206,6 +215,15 @@ class Language: sphinxopts: tuple html_only: bool = False + @staticmethod + def filter(languages, language_tags=None): + if language_tags: + languages_dict = {language.tag: language for language in languages} + return [languages_dict[tag] for tag in language_tags] + return languages + + + XELATEX_DEFAULT = ( "-D latex_engine=xelatex", @@ -572,8 +590,7 @@ def parse_args(): parser.add_argument( "-b", "--branch", - choices=dict.fromkeys(chain(*((v.branch_or_tag, v.name) for v in VERSIONS))), - metavar=Version.current_dev().name, + metavar="3.7", help="Version to build (defaults to all maintained branches).", ) parser.add_argument( @@ -609,7 +626,6 @@ def parse_args(): parser.add_argument( "--languages", nargs="*", - default="all", help="Language translation, as a PEP 545 language tag like" " 'fr' or 'pt-br'. " "Use 'all' to build all of them (it's the default behavior).", metavar="fr", @@ -1037,44 +1053,42 @@ def purge_path(www_root: Path, path: Path): run(["curl", "-XPURGE", f"https://docs.python.org/{{{','.join(to_purge)}}}"]) -def parse_config(): +def parse_versions_from_devguide(): + releases = requests.get( + "https://raw.githubusercontent.com/python/devguide/main/include/release-cycle.json" + ).json() + return [Version.from_json(name, release) for name, release in releases.items()] + + +def parse_languages_from_config(): config = configparser.ConfigParser() config.read(HERE / "config.ini") versions, languages = [], [] for name, section in config.items(): - if section.get("status"): # It's a version - versions.append( - Version( - name, - status=section["status"], - branch=section.get("branch"), - tag=section.get("tag"), - ) - ) - if section.get("name"): # It's a language - languages.append( - Language( - name, - section.get("iso639_tag", name), - section["name"], - section.getboolean("in_prod", True), - sphinxopts=globals()[section.get("sphinxopts", "XELATEX_DEFAULT")], - html_only=section.get("html_only", False), - ) + if name == "DEFAULT": + continue + languages.append( + Language( + name, + section.get("iso639_tag", name), + section["name"], + section.getboolean("in_prod", True), + sphinxopts=globals()[section.get("sphinxopts", "XELATEX_DEFAULT")], + html_only=section.get("html_only", False), ) - return versions, languages + ) + return languages def build_docs(args) -> bool: """Build all docs (each languages and each versions).""" - versions, languages = parse_config() - languages_dict = {language.tag: language for language in languages} - todo = list( - product( - Version.filter(versions, args.branch), - [languages_dict[tag] for tag in args.languages], - ) - ) + versions = parse_versions_from_devguide() + languages = parse_languages_from_config() + todo = [ + (version, language) + for version in Version.filter(versions, args.branch) + for language in Language.filter(languages, args.languages) + ] del args.branch del args.languages all_built_successfully = True diff --git a/config.ini b/config.ini index eba1d88..92e8acb 100644 --- a/config.ini +++ b/config.ini @@ -1,44 +1,3 @@ -# EOL and security-fixes are not automatically built, no need to remove them -# from the list, this way we can still easily rebuild them manually as needed. -# -# Please keep the list in reverse-order for ease of editing. - -[3.12] -branch = origin/main -status = in development - -[3.11] -branch = origin/3.11 -status = stable - -[3.10] -branch = origin/3.10 -status = stable - -[3.9] -branch = origin/3.9 -status = security-fixes - -[3.8] -branch = origin/3.8 -status = security-fixes - -[3.7] -branch = origin/3.7 -status = security-fixes - -[3.6] -tag = 3.6 -status = EOL - -[3.5] -tag = 3.5 -status = EOL - -[2.7] -tag = 2.7 -status = EOL - [en] name = English From 6fc683cdffc0648a450144949e5b1219ea3b66aa Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Thu, 12 Oct 2023 09:51:45 +0200 Subject: [PATCH 04/20] Move latex config to config file. --- build_docs.py | 94 ++++++------------------------------ config.ini | 51 -------------------- config.toml | 123 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + 4 files changed, 139 insertions(+), 130 deletions(-) delete mode 100644 config.ini create mode 100644 config.toml diff --git a/build_docs.py b/build_docs.py index c77c4ab..ce01f76 100755 --- a/build_docs.py +++ b/build_docs.py @@ -3,7 +3,10 @@ """Build the Python docs for various branches and various languages. Without any arguments builds docs for all active versions and -languages configured in the config.ini file. +languages. + +Languages are stored in `config.toml` while versions are discovered +from the devguide. -q selects "quick build", which means to build only HTML. @@ -20,7 +23,6 @@ """ from argparse import ArgumentParser -import configparser from contextlib import suppress from dataclasses import dataclass import filecmp @@ -47,6 +49,8 @@ import zc.lockfile import jinja2 import requests +from tomlkit import parse + HERE = Path(__file__).resolve().parent @@ -223,71 +227,6 @@ def filter(languages, language_tags=None): return languages - - -XELATEX_DEFAULT = ( - "-D latex_engine=xelatex", - "-D latex_elements.inputenc=", - "-D latex_elements.fontenc=", -) - -LUALATEX_FOR_JP = ( - "-D latex_engine=lualatex", - "-D latex_elements.inputenc=", - "-D latex_elements.fontenc=", - "-D latex_docclass.manual=ltjsbook", - "-D latex_docclass.howto=ltjsarticle", - - # supress polyglossia warnings - "-D latex_elements.polyglossia=", - "-D latex_elements.fontpkg=", - - # preamble - "-D latex_elements.preamble=" - - # Render non-Japanese letters with luatex - # https://gist.github.com/zr-tex8r/e0931df922f38fbb67634f05dfdaf66b - r"\\usepackage[noto-otf]{luatexja-preset}" - r"\\usepackage{newunicodechar}" - r"\\newunicodechar{^^^^212a}{K}" - - # Workaround for the luatex-ja issue (Thanks to @jfbu) - # https://github.com/sphinx-doc/sphinx/issues/11179#issuecomment-1420715092 - # https://osdn.net/projects/luatex-ja/ticket/47321 - r"\\makeatletter" - r"\\titleformat{\\subsubsection}{\\normalsize\\py@HeaderFamily}" - r"{\\py@TitleColor\\thesubsubsection}{0.5em}{\\py@TitleColor}" - r"\\titleformat{\\paragraph}{\\normalsize\\py@HeaderFamily}" - r"{\\py@TitleColor\\theparagraph}{0.5em}{\\py@TitleColor}" - r"\\titleformat{\\subparagraph}{\\normalsize\\py@HeaderFamily}" - r"{\\py@TitleColor\\thesubparagraph}{0.5em}{\\py@TitleColor}" - r"\\makeatother" - - # subpress warning: (fancyhdr)Make it at least 16.4pt - r"\\setlength{\\footskip}{16.4pt}" -) - -XELATEX_WITH_FONTSPEC = ( - "-D latex_engine=xelatex", - "-D latex_elements.inputenc=", - r"-D latex_elements.fontenc=\\usepackage{fontspec}", -) - -XELATEX_FOR_KOREAN = ( - "-D latex_engine=xelatex", - "-D latex_elements.inputenc=", - "-D latex_elements.fontenc=", - r"-D latex_elements.preamble=\\usepackage{kotex}\\setmainhangulfont" - r"{UnBatang}\\setsanshangulfont{UnDotum}\\setmonohangulfont{UnTaza}", -) - -XELATEX_WITH_CJK = ( - "-D latex_engine=xelatex", - "-D latex_elements.inputenc=", - r"-D latex_elements.fontenc=\\usepackage{xeCJK}", -) - - def run(cmd, cwd=None) -> subprocess.CompletedProcess: """Like subprocess.run, with logging before and after the command execution.""" cmd = [str(arg) for arg in cmd] @@ -626,7 +565,8 @@ def parse_args(): parser.add_argument( "--languages", nargs="*", - help="Language translation, as a PEP 545 language tag like" " 'fr' or 'pt-br'. " + help="Language translation, as a PEP 545 language tag like" + " 'fr' or 'pt-br'. " "Use 'all' to build all of them (it's the default behavior).", metavar="fr", ) @@ -1061,20 +1001,18 @@ def parse_versions_from_devguide(): def parse_languages_from_config(): - config = configparser.ConfigParser() - config.read(HERE / "config.ini") - versions, languages = [], [] - for name, section in config.items(): - if name == "DEFAULT": - continue + """Read config.toml to discover languages to build.""" + config = tomlkit.parse((HERE / "config.toml").read_text(encoding="UTF-8")) + languages = [] + for name, section in config["languages"].items(): languages.append( Language( name, section.get("iso639_tag", name), section["name"], - section.getboolean("in_prod", True), - sphinxopts=globals()[section.get("sphinxopts", "XELATEX_DEFAULT")], - html_only=section.get("html_only", False), + section.get("in_prod", config["defaults"]["in_prod"]), + sphinxopts=section.get("sphinxopts", config["defaults"]["sphinxopts"]), + html_only=section.get("html_only", config["defaults"]["html_only"]), ) ) return languages @@ -1129,8 +1067,6 @@ def main(): lock.close() - - if __name__ == "__main__": all_built_successfully = main() sys.exit(EX_OK if all_built_successfully else EX_FAILURE) diff --git a/config.ini b/config.ini deleted file mode 100644 index 92e8acb..0000000 --- a/config.ini +++ /dev/null @@ -1,51 +0,0 @@ -[en] -name = English - -[es] -name = Spanish -sphinxopts = XELATEX_WITH_FONTSPEC - -[fr] -name = French -sphinxopts = XELATEX_WITH_FONTSPEC - -[id] -name = Indonesian -in_prod = False - -[it] -name = Italian -in_prod = False - -[ja] -name = Japanese -html_only = True # See https://github.com/python/python-docs-ja/issues/35 - -[ko] -name = Korean -sphinxopts = XELATEX_FOR_KOREAN - -[pl] -name = Polish -in_prod = False - -[pt-br] -iso639_tag = pt_BR -name = Brazilian Portuguese - -[tr] -name = Turkish - -[uk] -name = Ukrainian -in_prod = False - -[zh-cn] -iso639_tag = zh_CN -name = Simplified Chinese -sphinxopts = XELATEX_WITH_CJK - -[zh-tw] -iso639_tag = zh_TW -name = Traditional Chinese -sphinxopts = XELATEX_WITH_CJK diff --git a/config.toml b/config.toml new file mode 100644 index 0000000..a8132e6 --- /dev/null +++ b/config.toml @@ -0,0 +1,123 @@ +[defaults] +# name has no default, it is mandatory. +# iso639_tag defaults to section key. +in_prod = true +html_only = false +sphinxopts = [ + "-D latex_engine=xelatex", + "-D latex_elements.inputenc=", + "-D latex_elements.fontenc=", +] + + +[languages] + +[languages.en] +name = "English" + +[languages.es] +name = "Spanish" +sphinxopts = [ + '-D latex_engine=xelatex', + '-D latex_elements.inputenc=', + '-D latex_elements.fontenc=\\usepackage{fontspec}', +] + +[languages.fr] +name = "French" +sphinxopts = [ + '-D latex_engine=xelatex', + '-D latex_elements.inputenc=', + '-D latex_elements.fontenc=\\usepackage{fontspec}', +] + +[languages.id] +name = "Indonesian" +in_prod = false + +[languages.it] +name = "Italian" +in_prod = false + +[languages.ja] +name = "Japanese" +html_only = true # See https://github.com/python/python-docs-ja/issues/35 +sphinxopts = [ + '-D latex_engine=lualatex', + '-D latex_elements.inputenc=', + '-D latex_elements.fontenc=', + '-D latex_docclass.manual=ltjsbook', + '-D latex_docclass.howto=ltjsarticle', + + # supress polyglossia warnings + '-D latex_elements.polyglossia=', + '-D latex_elements.fontpkg=', + + # preamble + # Render non-Japanese letters with luatex + # https://gist.github.com/zr-tex8r/e0931df922f38fbb67634f05dfdaf66b + # Workaround for the luatex-ja issue (Thanks to @jfbu) + # https://github.com/sphinx-doc/sphinx/issues/11179#issuecomment-1420715092 + # https://osdn.net/projects/luatex-ja/ticket/47321 + # subpress warning: (fancyhdr)Make it at least 16.4pt + '''-D latex_elements.preamble= + \\usepackage[noto-otf]{luatexja-preset} + \\usepackage{newunicodechar} + \\newunicodechar{^^^^212a}{K} + \\makeatletter + \\titleformat{\\subsubsection}{\\normalsize\\py@HeaderFamily} + {\\py@TitleColor\\thesubsubsection}{0.5em}{\\py@TitleColor} + \\titleformat{\\paragraph}{\\normalsize\\py@HeaderFamily} + {\\py@TitleColor\\theparagraph}{0.5em}{\\py@TitleColor} + \\titleformat{\\subparagraph}{\\normalsize\\py@HeaderFamily} + {\\py@TitleColor\\thesubparagraph}{0.5em}{\\py@TitleColor} + \\makeatother + \\setlength{\\footskip}{16.4pt}''' +] + +[languages.ko] +name = "Korean" +sphinxopts = [ + '-D latex_engine=xelatex', + '-D latex_elements.inputenc=', + '-D latex_elements.fontenc=', + '''-D latex_elements.preamble= +\\usepackage{kotex} +\\setmainhangulfont{UnBatang} +\\setsanshangulfont{UnDotum} +\\setmonohangulfont{UnTaza}''', +] + +[languages.pl] +name = "Polish" +in_prod = false + +[languages.pt-br] +iso639_tag = "pt_BR" +name = "Brazilian Portuguese" + +[languages.tr] +name = "Turkish" + +[languages.uk] +name = "Ukrainian" +in_prod = false +html_only = true + +[languages.zh-cn] +iso639_tag = "zh_CN" +name = "Simplified Chinese" +sphinxopts = [ + '-D latex_engine=xelatex', + '-D latex_elements.inputenc=', + '-D latex_elements.fontenc=\\usepackage{xeCJK}', +] + +[languages.zh-tw] +iso639_tag = "zh_TW" +name = "Traditional Chinese" +sphinxopts = [ + '-D latex_engine=xelatex', + '-D latex_elements.inputenc=', + '-D latex_elements.fontenc=\\usepackage{xeCJK}', +] diff --git a/requirements.txt b/requirements.txt index 65ae7f2..f51c7d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ jinja2 requests sentry-sdk +tomlkit zc.lockfile From de24eb7d8114c661d5defd81e28fa0b04e691b4e Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Thu, 12 Oct 2023 10:17:37 +0200 Subject: [PATCH 05/20] Proofreading. --- README.md | 11 ----------- build_docs.py | 55 ++++++++++++++++++++++++++------------------------- 2 files changed, 28 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 36ccbf2..3e70fa4 100644 --- a/README.md +++ b/README.md @@ -54,14 +54,3 @@ of Sphinx we're using where: 3.12 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 3.13 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 ========= ===== ===== ===== ===== ===== ===== ===== ===== ======= ===== ===== ======= ======= - -## The GitHub hook server - -`build_docs_server.py` is a simple HTTP server handling GitHub Webhooks -requests to build the doc when needed. It only needs `push` events. - -Its logging can be configured by giving a yaml file path to the -`--logging-config` argument. - -By default the loglevel is `DEBUG` on `stderr`, the default config can -be found in the code so one can bootstrap a different config from it. diff --git a/build_docs.py b/build_docs.py index ce01f76..ea8bd1a 100755 --- a/build_docs.py +++ b/build_docs.py @@ -23,10 +23,9 @@ """ from argparse import ArgumentParser -from contextlib import suppress +from contextlib import suppress, contextmanager from dataclasses import dataclass import filecmp -from itertools import chain, product import json import logging import logging.handlers @@ -37,10 +36,8 @@ import shutil import subprocess import sys -import time from bisect import bisect_left as bisect from collections import OrderedDict -from contextlib import contextmanager from pathlib import Path from string import Template from textwrap import indent @@ -49,11 +46,9 @@ import zc.lockfile import jinja2 import requests -from tomlkit import parse +import tomlkit -HERE = Path(__file__).resolve().parent - try: from os import EX_OK, EX_SOFTWARE as EX_FAILURE except ImportError: @@ -66,11 +61,7 @@ else: sentry_sdk.init() -if not hasattr(shlex, "join"): - # Add shlex.join if missing (pre 3.8) - shlex.join = lambda split_command: " ".join( - shlex.quote(arg) for arg in split_command - ) +HERE = Path(__file__).resolve().parent @total_ordering @@ -98,7 +89,8 @@ def __init__( status = self.SYNONYMS.get(status, status) if status not in self.STATUSES: raise ValueError( - f"Version status expected to be one of: {', '.join(self.STATUSES|set(self.SYNONYMS.keys()))}, got {status!r}." + "Version status expected to be one of: " + f"{', '.join(self.STATUSES|set(self.SYNONYMS.keys()))}, got {status!r}." ) self.name = name self.branch_or_tag = branch_or_tag @@ -167,7 +159,7 @@ def filter(versions, branch=None): @staticmethod def current_stable(versions): """Find the current stable cPython version.""" - return max([v for v in versions if v.status == "stable"], key=Version.as_tuple) + return max((v for v in versions if v.status == "stable"), key=Version.as_tuple) @staticmethod def current_dev(versions): @@ -201,6 +193,7 @@ def setup_indexsidebar(self, versions, dest_path): @classmethod def from_json(cls, name, values): + """Loads a version from devguide's json representation.""" return cls(name, status=values["status"], branch_or_tag=values["branch"]) def __eq__(self, other): @@ -221,6 +214,7 @@ class Language: @staticmethod def filter(languages, language_tags=None): + """Filter a sequence of languages according to --languages.""" if language_tags: languages_dict = {language.tag: language for language in languages} return [languages_dict[tag] for tag in language_tags] @@ -447,7 +441,7 @@ def build_robots_txt( robots_file.chmod(0o775) run(["chgrp", group, robots_file]) if not skip_cache_invalidation: - requests.request("PURGE", "https://docs.python.org/robots.txt") + requests.request("PURGE", "https://docs.python.org/robots.txt", timeout=30) def build_sitemap( @@ -703,13 +697,19 @@ def build(self): if self.language.tag == "ja": # Since luatex doesn't support \ufffd, replace \ufffd with '?'. # https://gist.github.com/zr-tex8r/e0931df922f38fbb67634f05dfdaf66b - # Luatex already fixed this issue, so we can remove this once Texlive is updated. + # Luatex already fixed this issue, so we can remove this once Texlive + # is updated. # (https://github.com/TeX-Live/luatex/commit/eaa95ce0a141eaf7a02) - subprocess.check_output("sed -i s/\N{REPLACEMENT CHARACTER}/?/g " - f"{locale_dirs}/ja/LC_MESSAGES/**/*.po", - shell=True) - subprocess.check_output("sed -i s/\N{REPLACEMENT CHARACTER}/?/g " - f"{self.checkout}/Doc/**/*.rst", shell=True) + subprocess.check_output( + "sed -i s/\N{REPLACEMENT CHARACTER}/?/g " + f"{locale_dirs}/ja/LC_MESSAGES/**/*.po", + shell=True, + ) + subprocess.check_output( + "sed -i s/\N{REPLACEMENT CHARACTER}/?/g " + f"{self.checkout}/Doc/**/*.rst", + shell=True, + ) if self.version.status == "EOL": sphinxopts.append("-D html_context.outdated=1") @@ -983,7 +983,7 @@ def proofread_canonicals(www_root: Path, skip_cache_invalidation: bool) -> None: if not skip_cache_invalidation: url = str(file).replace("/srv/", "https://") logging.info("Purging %s from CDN", url) - requests.request("PURGE", url) + requests.request("PURGE", url, timeout=30) def purge_path(www_root: Path, path: Path): @@ -995,7 +995,9 @@ def purge_path(www_root: Path, path: Path): def parse_versions_from_devguide(): releases = requests.get( - "https://raw.githubusercontent.com/python/devguide/main/include/release-cycle.json" + "https://raw.githubusercontent.com/" + "python/devguide/main/include/release-cycle.json", + timeout=30, ).json() return [Version.from_json(name, release) for name, release in releases.items()] @@ -1059,14 +1061,13 @@ def main(): lock = zc.lockfile.LockFile(HERE / "build_docs.lock") except zc.lockfile.LockError: logging.info("Another builder is running... dying...") - return False + return EX_FAILURE try: - build_docs(args) + return EX_OK if build_docs(args) else EX_FAILURE finally: lock.close() if __name__ == "__main__": - all_built_successfully = main() - sys.exit(EX_OK if all_built_successfully else EX_FAILURE) + sys.exit(main()) From d3e86f667d3d032559789198b394092a746701cc Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Thu, 12 Oct 2023 11:42:39 +0200 Subject: [PATCH 06/20] Clone indiferently branches and tags. --- build_docs.py | 70 +++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/build_docs.py b/build_docs.py index ea8bd1a..8f7d2e2 100755 --- a/build_docs.py +++ b/build_docs.py @@ -268,26 +268,34 @@ def traverse(dircmp_result): return changed -def git_clone(repository: str, directory: Path, branch_or_tag=None): - """Clone or update the given repository in the given directory. - Optionally checking out a branch. - """ - logging.info("Updating repository %s in %s", repository, directory) - try: - if not (directory / ".git").is_dir(): - raise AssertionError("Not a git repository.") - run(["git", "-C", directory, "fetch"]) - if branch_or_tag: - run(["git", "-C", directory, "reset", "--hard", branch_or_tag, "--"]) - run(["git", "-C", directory, "clean", "-dfqx"]) - except (subprocess.CalledProcessError, AssertionError): - if directory.exists(): - shutil.rmtree(directory) - logging.info("Cloning %s into %s", repository, directory) - directory.mkdir(mode=0o775, parents=True, exist_ok=True) - run(["git", "clone", repository, directory]) - if branch_or_tag: - run(["git", "-C", directory, "reset", "--hard", branch_or_tag, "--"]) +@dataclass +class Git: + """Git command abstraction for our specific needs.""" + + repository: str + directory: Path + + def run(self, *args): + """Run git command in the clone repository.""" + return run(("git", "-C", self.directory) + args) + + def get_ref(self, pattern): + """Return the reference of a given tag or branch.""" + return self.run("show-ref", "-s", pattern).stdout.strip() + + def switch(self, branch_or_tag): + """Reset and cleans the repository to the given branch or tag.""" + self.run("fetch") + self.run("reset", "--hard", self.get_ref(branch_or_tag), "--") + self.run("clean", "-dfqx") + + def clone(self): + """Maybe clone the repository, if not already cloned.""" + if (self.directory / ".git").is_dir(): + return # Already cloned + logging.info("Cloning %s into %s", self.repository, self.directory) + self.directory.mkdir(mode=0o775, parents=True, exist_ok=True) + self.run("clone", self.repository, self.directory) def version_to_tuple(version): @@ -342,7 +350,7 @@ def translation_branch(locale_repo, locale_clone_dir, needed_version: str): It could be enhanced to return tags, if needed, just return the tag as a string (without the `origin/` branch prefix). """ - git_clone(locale_repo, locale_clone_dir) + Git(locale_repo, locale_clone_dir).clone() remote_branches = run(["git", "-C", locale_clone_dir, "branch", "-r"]).stdout branches = re.findall(r"/([0-9]+\.[0-9]+)$", remote_branches, re.M) return "origin/" + locate_nearest_version(branches, needed_version) @@ -655,6 +663,10 @@ def checkout(self) -> Path: return self.build_root / "cpython" def clone_translation(self): + """Clone the translation repository from github. + + See PEP 545 for repository naming convention. + """ locale_repo = f"https://github.com/python/python-docs-{self.language.tag}.git" locale_clone_dir = ( self.build_root @@ -663,18 +675,16 @@ def clone_translation(self): / self.language.iso639_tag / "LC_MESSAGES" ) - git_clone( - locale_repo, - locale_clone_dir, - translation_branch(locale_repo, locale_clone_dir, self.version.name), + repo = Git(locale_repo, locale_clone_dir) + repo.clone() + repo.switch( + translation_branch(locale_repo, locale_clone_dir, self.version.name) ) def clone_cpython(self): - git_clone( - "https://github.com/python/cpython.git", - self.checkout, - self.version.branch_or_tag, - ) + repo = Git("https://github.com/python/cpython.git", self.checkout) + repo.clone() + repo.switch(self.version.branch_or_tag) def build(self): """Build this version/language doc.""" From aeb773d873259601e54b6f33aea28143f4f4ecec Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Thu, 12 Oct 2023 22:13:43 +0200 Subject: [PATCH 07/20] Those arguments can't contain newlines. --- config.toml | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/config.toml b/config.toml index a8132e6..1253447 100644 --- a/config.toml +++ b/config.toml @@ -60,19 +60,7 @@ sphinxopts = [ # https://github.com/sphinx-doc/sphinx/issues/11179#issuecomment-1420715092 # https://osdn.net/projects/luatex-ja/ticket/47321 # subpress warning: (fancyhdr)Make it at least 16.4pt - '''-D latex_elements.preamble= - \\usepackage[noto-otf]{luatexja-preset} - \\usepackage{newunicodechar} - \\newunicodechar{^^^^212a}{K} - \\makeatletter - \\titleformat{\\subsubsection}{\\normalsize\\py@HeaderFamily} - {\\py@TitleColor\\thesubsubsection}{0.5em}{\\py@TitleColor} - \\titleformat{\\paragraph}{\\normalsize\\py@HeaderFamily} - {\\py@TitleColor\\theparagraph}{0.5em}{\\py@TitleColor} - \\titleformat{\\subparagraph}{\\normalsize\\py@HeaderFamily} - {\\py@TitleColor\\thesubparagraph}{0.5em}{\\py@TitleColor} - \\makeatother - \\setlength{\\footskip}{16.4pt}''' + '-D latex_elements.preamble=\\usepackage[noto-otf]{luatexja-preset}\\usepackage{newunicodechar}\\newunicodechar{^^^^212a}{K}\\makeatletter\\titleformat{\\subsubsection}{\\normalsize\\py@HeaderFamily}{\\py@TitleColor\\thesubsubsection}{0.5em}{\\py@TitleColor}\\titleformat{\\paragraph}{\\normalsize\\py@HeaderFamily}{\\py@TitleColor\\theparagraph}{0.5em}{\\py@TitleColor}\\titleformat{\\subparagraph}{\\normalsize\\py@HeaderFamily}{\\py@TitleColor\\thesubparagraph}{0.5em}{\\py@TitleColor}\\makeatother\\setlength{\\footskip}{16.4pt}' ] [languages.ko] @@ -81,11 +69,7 @@ sphinxopts = [ '-D latex_engine=xelatex', '-D latex_elements.inputenc=', '-D latex_elements.fontenc=', - '''-D latex_elements.preamble= -\\usepackage{kotex} -\\setmainhangulfont{UnBatang} -\\setsanshangulfont{UnDotum} -\\setmonohangulfont{UnTaza}''', + '-D latex_elements.preamble=\\usepackage{kotex}\\setmainhangulfont{UnBatang}\\setsanshangulfont{UnDotum}\\setmonohangulfont{UnTaza}', ] [languages.pl] From 7cc0aa4d4ab46472e33cb2553467991774b0e532 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Thu, 12 Oct 2023 22:47:28 +0200 Subject: [PATCH 08/20] Has been removed in d5a0a72b373c81f64bbabcbe8b885ba2a409be38. --- config.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/config.toml b/config.toml index 1253447..23b613e 100644 --- a/config.toml +++ b/config.toml @@ -41,7 +41,6 @@ in_prod = false [languages.ja] name = "Japanese" -html_only = true # See https://github.com/python/python-docs-ja/issues/35 sphinxopts = [ '-D latex_engine=lualatex', '-D latex_elements.inputenc=', From 2b2af6dddc6220b6f3c1f612aac7a2d519b06b11 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Sat, 14 Oct 2023 11:29:45 +0200 Subject: [PATCH 09/20] Can't clone with to an unexisting directory. --- build_docs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_docs.py b/build_docs.py index 8f7d2e2..03e8f5a 100755 --- a/build_docs.py +++ b/build_docs.py @@ -295,7 +295,7 @@ def clone(self): return # Already cloned logging.info("Cloning %s into %s", self.repository, self.directory) self.directory.mkdir(mode=0o775, parents=True, exist_ok=True) - self.run("clone", self.repository, self.directory) + run(["git", "clone", self.repository, self.directory]) def version_to_tuple(version): From 6c8d0b484eaa99409edc81c49fc4276bb8f55981 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Sat, 14 Oct 2023 13:42:19 +0200 Subject: [PATCH 10/20] Fix link. --- build_docs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_docs.py b/build_docs.py index 03e8f5a..f16a47c 100755 --- a/build_docs.py +++ b/build_docs.py @@ -709,7 +709,7 @@ def build(self): # https://gist.github.com/zr-tex8r/e0931df922f38fbb67634f05dfdaf66b # Luatex already fixed this issue, so we can remove this once Texlive # is updated. - # (https://github.com/TeX-Live/luatex/commit/eaa95ce0a141eaf7a02) + # (https://github.com/TeX-Live/luatex/commit/af5faf1) subprocess.check_output( "sed -i s/\N{REPLACEMENT CHARACTER}/?/g " f"{locale_dirs}/ja/LC_MESSAGES/**/*.po", From 8389b9e647cc91095ec169b060ba4fba6b3b1882 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Sat, 14 Oct 2023 13:46:59 +0200 Subject: [PATCH 11/20] More uses of the Git class, thanks to AA-Turner review. --- build_docs.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/build_docs.py b/build_docs.py index f16a47c..e2b76b9 100755 --- a/build_docs.py +++ b/build_docs.py @@ -340,7 +340,7 @@ def locate_nearest_version(available_versions, target_version): return tuple_to_version(found) -def translation_branch(locale_repo, locale_clone_dir, needed_version: str): +def translation_branch(repo: Git, needed_version: str): """Some cpython versions may be untranslated, being either too old or too new. @@ -350,8 +350,7 @@ def translation_branch(locale_repo, locale_clone_dir, needed_version: str): It could be enhanced to return tags, if needed, just return the tag as a string (without the `origin/` branch prefix). """ - Git(locale_repo, locale_clone_dir).clone() - remote_branches = run(["git", "-C", locale_clone_dir, "branch", "-r"]).stdout + remote_branches = repo.run("branch", "-r").stdout branches = re.findall(r"/([0-9]+\.[0-9]+)$", remote_branches, re.M) return "origin/" + locate_nearest_version(branches, needed_version) @@ -677,9 +676,7 @@ def clone_translation(self): ) repo = Git(locale_repo, locale_clone_dir) repo.clone() - repo.switch( - translation_branch(locale_repo, locale_clone_dir, self.version.name) - ) + repo.switch(translation_branch(repo, self.version.name)) def clone_cpython(self): repo = Git("https://github.com/python/cpython.git", self.checkout) From 88becd0862fd1f4d3a82d226ba0b8d85a1865d4a Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Sat, 14 Oct 2023 14:08:14 +0200 Subject: [PATCH 12/20] Purge helper. --- build_docs.py | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/build_docs.py b/build_docs.py index e2b76b9..628c65c 100755 --- a/build_docs.py +++ b/build_docs.py @@ -42,6 +42,7 @@ from string import Template from textwrap import indent from typing import Iterable +from urllib.parse import urljoin import zc.lockfile import jinja2 @@ -448,7 +449,7 @@ def build_robots_txt( robots_file.chmod(0o775) run(["chgrp", group, robots_file]) if not skip_cache_invalidation: - requests.request("PURGE", "https://docs.python.org/robots.txt", timeout=30) + purge("robots.txt") def build_sitemap( @@ -904,13 +905,9 @@ def copy_build_to_webroot(self): prefixes = run(["find", "-L", targets_dir, "-samefile", target]).stdout prefixes = prefixes.replace(targets_dir + "/", "") prefixes = [prefix + "/" for prefix in prefixes.split("\n") if prefix] - to_purge = prefixes[:] + purge(*prefixes) for prefix in prefixes: - to_purge.extend(prefix + p for p in changed) - logging.info("Running CDN purge") - run( - ["curl", "-XPURGE", f"https://docs.python.org/{{{','.join(to_purge)}}}"] - ) + purge(*[prefix + p for p in changed]) logging.info( "Publishing done for version: %s, language: %s", self.version.name, @@ -966,6 +963,28 @@ def dev_symlink(www_root: Path, group, versions, languages): symlink(www_root, language, current_dev, "dev", group) +def purge(*paths): + """Remove one or many paths from docs.python.org's CDN. + + To be used when a file change, so the CDN fetch the new one. + """ + base = "https://docs.python.org/" + for path in paths: + url = urljoin(base, str(path)) + logging.info("Purging %s from CDN", url) + requests.request("PURGE", url, timeout=30) + + +def purge_path(www_root: Path, path: Path): + """Recursively remove a path from docs.python.org's CDN. + + To be used when a directory change, so the CDN fetch the new one. + """ + purge(*[file.relative_to(www_root) for file in path.glob("**/*")]) + purge(path.relative_to(www_root)) + purge(str(path.relative_to(www_root)) + "/") + + def proofread_canonicals(www_root: Path, skip_cache_invalidation: bool) -> None: """In www_root we check that all canonical links point to existing contents. @@ -988,16 +1007,7 @@ def proofread_canonicals(www_root: Path, skip_cache_invalidation: bool) -> None: html = html.replace(canonical.group(0), "") file.write_text(html, encoding="UTF-8", errors="surrogateescape") if not skip_cache_invalidation: - url = str(file).replace("/srv/", "https://") - logging.info("Purging %s from CDN", url) - requests.request("PURGE", url, timeout=30) - - -def purge_path(www_root: Path, path: Path): - to_purge = [str(file.relative_to(www_root)) for file in path.glob("**/*")] - to_purge.append(str(path.relative_to(www_root))) - to_purge.append(str(path.relative_to(www_root)) + "/") - run(["curl", "-XPURGE", f"https://docs.python.org/{{{','.join(to_purge)}}}"]) + purge(str(file).replace("/srv/docs.python.org/", "")) def parse_versions_from_devguide(): From b81466f0bf9ceb2616372aed2509da0f6b7c4186 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Sat, 14 Oct 2023 14:11:04 +0200 Subject: [PATCH 13/20] Avoid some lookups. --- build_docs.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/build_docs.py b/build_docs.py index 628c65c..497854f 100755 --- a/build_docs.py +++ b/build_docs.py @@ -1023,15 +1023,16 @@ def parse_languages_from_config(): """Read config.toml to discover languages to build.""" config = tomlkit.parse((HERE / "config.toml").read_text(encoding="UTF-8")) languages = [] + defaults = config["defaults"] for name, section in config["languages"].items(): languages.append( Language( name, section.get("iso639_tag", name), section["name"], - section.get("in_prod", config["defaults"]["in_prod"]), - sphinxopts=section.get("sphinxopts", config["defaults"]["sphinxopts"]), - html_only=section.get("html_only", config["defaults"]["html_only"]), + section.get("in_prod", defaults["in_prod"]), + sphinxopts=section.get("sphinxopts", defaults["sphinxopts"]), + html_only=section.get("html_only", defaults["html_only"]), ) ) return languages From 2a9e24fe2e7d47259d0e31999c89ddf7072729b3 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Sat, 14 Oct 2023 14:12:51 +0200 Subject: [PATCH 14/20] Not usefull. --- config.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/config.toml b/config.toml index 23b613e..7c0deee 100644 --- a/config.toml +++ b/config.toml @@ -9,9 +9,6 @@ sphinxopts = [ "-D latex_elements.fontenc=", ] - -[languages] - [languages.en] name = "English" From d515559bc46add92b91ecf1d65caa116c9abddb1 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Sat, 14 Oct 2023 14:38:05 +0200 Subject: [PATCH 15/20] tag can be deduced from iso-tag. --- build_docs.py | 10 ++++++---- config.toml | 10 +++------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/build_docs.py b/build_docs.py index 497854f..65c2272 100755 --- a/build_docs.py +++ b/build_docs.py @@ -206,13 +206,16 @@ def __gt__(self, other): @dataclass(frozen=True, order=True) class Language: - tag: str iso639_tag: str name: str in_prod: bool sphinxopts: tuple html_only: bool = False + @property + def tag(self): + return self.iso639_tag.replace("_", "-").lower() + @staticmethod def filter(languages, language_tags=None): """Filter a sequence of languages according to --languages.""" @@ -1024,11 +1027,10 @@ def parse_languages_from_config(): config = tomlkit.parse((HERE / "config.toml").read_text(encoding="UTF-8")) languages = [] defaults = config["defaults"] - for name, section in config["languages"].items(): + for iso639_tag, section in config["languages"].items(): languages.append( Language( - name, - section.get("iso639_tag", name), + iso639_tag, section["name"], section.get("in_prod", defaults["in_prod"]), sphinxopts=section.get("sphinxopts", defaults["sphinxopts"]), diff --git a/config.toml b/config.toml index 7c0deee..c3f4886 100644 --- a/config.toml +++ b/config.toml @@ -1,6 +1,5 @@ [defaults] # name has no default, it is mandatory. -# iso639_tag defaults to section key. in_prod = true html_only = false sphinxopts = [ @@ -72,8 +71,7 @@ sphinxopts = [ name = "Polish" in_prod = false -[languages.pt-br] -iso639_tag = "pt_BR" +[languages.pt_BR] name = "Brazilian Portuguese" [languages.tr] @@ -84,8 +82,7 @@ name = "Ukrainian" in_prod = false html_only = true -[languages.zh-cn] -iso639_tag = "zh_CN" +[languages.zh_CN] name = "Simplified Chinese" sphinxopts = [ '-D latex_engine=xelatex', @@ -93,8 +90,7 @@ sphinxopts = [ '-D latex_elements.fontenc=\\usepackage{xeCJK}', ] -[languages.zh-tw] -iso639_tag = "zh_TW" +[languages.zh_TW] name = "Traditional Chinese" sphinxopts = [ '-D latex_engine=xelatex', From d83b8302b9d8c8c9d6b13c3d1e7f46ff7f33353c Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Sat, 14 Oct 2023 14:55:51 +0200 Subject: [PATCH 16/20] Maybe better names for repository and remote. --- build_docs.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/build_docs.py b/build_docs.py index 65c2272..158c005 100755 --- a/build_docs.py +++ b/build_docs.py @@ -273,10 +273,10 @@ def traverse(dircmp_result): @dataclass -class Git: - """Git command abstraction for our specific needs.""" +class Repository: + """Git repository abstraction for our specific needs.""" - repository: str + remote: str directory: Path def run(self, *args): @@ -297,9 +297,9 @@ def clone(self): """Maybe clone the repository, if not already cloned.""" if (self.directory / ".git").is_dir(): return # Already cloned - logging.info("Cloning %s into %s", self.repository, self.directory) + logging.info("Cloning %s into %s", self.remote, self.directory) self.directory.mkdir(mode=0o775, parents=True, exist_ok=True) - run(["git", "clone", self.repository, self.directory]) + run(["git", "clone", self.remote, self.directory]) def version_to_tuple(version): @@ -344,7 +344,7 @@ def locate_nearest_version(available_versions, target_version): return tuple_to_version(found) -def translation_branch(repo: Git, needed_version: str): +def translation_branch(repo: Repository, needed_version: str): """Some cpython versions may be untranslated, being either too old or too new. @@ -678,12 +678,12 @@ def clone_translation(self): / self.language.iso639_tag / "LC_MESSAGES" ) - repo = Git(locale_repo, locale_clone_dir) + repo = Repository(locale_repo, locale_clone_dir) repo.clone() repo.switch(translation_branch(repo, self.version.name)) def clone_cpython(self): - repo = Git("https://github.com/python/cpython.git", self.checkout) + repo = Repository("https://github.com/python/cpython.git", self.checkout) repo.clone() repo.switch(self.version.branch_or_tag) From 6ed6b925623112ee9da40bb5dac924f90dee2dae Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Sat, 14 Oct 2023 15:03:40 +0200 Subject: [PATCH 17/20] Run a single git fetch on cpython. --- build_docs.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/build_docs.py b/build_docs.py index 158c005..80cb6a3 100755 --- a/build_docs.py +++ b/build_docs.py @@ -287,19 +287,25 @@ def get_ref(self, pattern): """Return the reference of a given tag or branch.""" return self.run("show-ref", "-s", pattern).stdout.strip() + def fetch(self): + self.run("fetch") + def switch(self, branch_or_tag): """Reset and cleans the repository to the given branch or tag.""" - self.run("fetch") self.run("reset", "--hard", self.get_ref(branch_or_tag), "--") self.run("clean", "-dfqx") def clone(self): """Maybe clone the repository, if not already cloned.""" if (self.directory / ".git").is_dir(): - return # Already cloned + return False # Already cloned logging.info("Cloning %s into %s", self.remote, self.directory) self.directory.mkdir(mode=0o775, parents=True, exist_ok=True) run(["git", "clone", self.remote, self.directory]) + return True + + def update(self): + self.clone() or self.fetch() def version_to_tuple(version): @@ -620,6 +626,7 @@ class DocBuilder: versions: Iterable[Version] language: Language languages: Iterable[Language] + cpython_repo: Repository build_root: Path www_root: Path quick: bool @@ -643,7 +650,7 @@ def full_build(self): def run(self) -> bool: """Build and publish a Python doc, for a language, and a version.""" try: - self.clone_cpython() + self.cpython_repo.switch(self.version.branch_or_tag) if self.language.tag != "en": self.clone_translation() self.build_venv() @@ -679,14 +686,9 @@ def clone_translation(self): / "LC_MESSAGES" ) repo = Repository(locale_repo, locale_clone_dir) - repo.clone() + repo.update() repo.switch(translation_branch(repo, self.version.name)) - def clone_cpython(self): - repo = Repository("https://github.com/python/cpython.git", self.checkout) - repo.clone() - repo.switch(self.version.branch_or_tag) - def build(self): """Build this version/language doc.""" logging.info( @@ -1052,13 +1054,19 @@ def build_docs(args) -> bool: del args.branch del args.languages all_built_successfully = True + cpython_repo = Repository( + "https://github.com/python/cpython.git", args.build_root / "cpython" + ) + cpython_repo.update() while todo: version, language = todo.pop() if sentry_sdk: with sentry_sdk.configure_scope() as scope: scope.set_tag("version", version.name) scope.set_tag("language", language.tag) - builder = DocBuilder(version, versions, language, languages, **vars(args)) + builder = DocBuilder( + version, versions, language, languages, cpython_repo, **vars(args) + ) all_built_successfully &= builder.run() build_sitemap(versions, languages, args.www_root, args.group) build_404(args.www_root, args.group) From 98fec40a7b2481218e27f67090d8340a5acbe46e Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Sun, 15 Oct 2023 15:22:08 +0200 Subject: [PATCH 18/20] It's 2023! --- build_docs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_docs.py b/build_docs.py index 80cb6a3..ce2d0fb 100755 --- a/build_docs.py +++ b/build_docs.py @@ -540,7 +540,7 @@ def parse_args(): parser.add_argument( "-b", "--branch", - metavar="3.7", + metavar="3.12", help="Version to build (defaults to all maintained branches).", ) parser.add_argument( From ca3404720c187c07ee6e9cd025f03d62d80a2479 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Mon, 16 Oct 2023 21:32:11 +0200 Subject: [PATCH 19/20] For the main branch, show-ref also finds the heads/main. --- build_docs.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/build_docs.py b/build_docs.py index ce2d0fb..c824bf5 100755 --- a/build_docs.py +++ b/build_docs.py @@ -285,7 +285,12 @@ def run(self, *args): def get_ref(self, pattern): """Return the reference of a given tag or branch.""" - return self.run("show-ref", "-s", pattern).stdout.strip() + try: + # Maybe it's a branch + return self.run("show-ref", "-s", "origin/" + pattern).stdout.strip() + except subprocess.CalledProcessError: + # Maybe it's a tag + return self.run("show-ref", "-s", "tags/" + pattern).stdout.strip() def fetch(self): self.run("fetch") @@ -357,12 +362,11 @@ def translation_branch(repo: Repository, needed_version: str): This function looks for remote branches on the given repo, and returns the name of the nearest existing branch. - It could be enhanced to return tags, if needed, just return the - tag as a string (without the `origin/` branch prefix). + It could be enhanced to also search for tags. """ remote_branches = repo.run("branch", "-r").stdout branches = re.findall(r"/([0-9]+\.[0-9]+)$", remote_branches, re.M) - return "origin/" + locate_nearest_version(branches, needed_version) + return locate_nearest_version(branches, needed_version) @contextmanager From 5a4c9538a97179f082cff524aee8f622b83ade00 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Tue, 17 Oct 2023 22:16:06 +0200 Subject: [PATCH 20/20] --language semantic has not been changed. --- build_docs.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/build_docs.py b/build_docs.py index c824bf5..efccaca 100755 --- a/build_docs.py +++ b/build_docs.py @@ -11,9 +11,8 @@ -q selects "quick build", which means to build only HTML. Translations are fetched from github repositories according to PEP -545. --languages allow select translations, use "--languages" to -build all translations (default) or "--languages en" to skip all -translations (as en is the untranslated version).. +545. `--languages` allows to select translations, like `--languages +en` to just build the english documents. This script was originally created and by Georg Brandl in March 2010. @@ -582,7 +581,7 @@ def parse_args(): nargs="*", help="Language translation, as a PEP 545 language tag like" " 'fr' or 'pt-br'. " - "Use 'all' to build all of them (it's the default behavior).", + "Builds all available languages by default.", metavar="fr", ) parser.add_argument(