diff --git a/.circleci/config.yml b/.circleci/config.yml index e00cad37c..e4e2a89f3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -44,7 +44,7 @@ jobs: "git+https://github.com/pyvista/pyvista" \ "docutils>=0.18" imageio pydata-sphinx-theme \ "jupyterlite-sphinx>=0.8.0,<0.9.0" "jupyterlite-pyodide-kernel<0.1.0" \ - libarchive-c "sphinxcontrib-video>=0.2.1rc0" + libarchive-c "sphinxcontrib-video>=0.2.1rc0" intersphinx_registry pip uninstall -yq vtk # pyvista installs vtk above pip install --upgrade --only-binary ":all" --extra-index-url https://wheels.vtk.org vtk-osmesa - save_cache: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 777e4502f..54fc9d2a5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -32,10 +32,6 @@ jobs: sphinx_version: dev distrib: pip locale: C - - os: ubuntu-latest # oldest supported Python and Sphinx - python: '3.8' - sphinx_version: '4' - distrib: mamba - os: ubuntu-latest python: '3.11' sphinx_version: '5' @@ -79,7 +75,7 @@ jobs: python=${{ env.PYTHON_VERSION }} pip numpy setuptools matplotlib pillow pytest pytest-cov coverage seaborn statsmodels plotly joblib wheel libiconv pygraphviz memory_profiler ipython pypandoc lxml conda-libmamba-solver mamba - ffmpeg + ffmpeg intersphinx-registry if: matrix.distrib == 'mamba' # Make sure that things work even if the locale is set to C (which # effectively means ASCII). Some of the input rst files have unicode diff --git a/CHANGES.rst b/CHANGES.rst index e55734322..4fa0a8592 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ Changelog ========= +v0.17.0 +------- + +Support for Python 3.8 and Sphinx 4 dropped in this release. +Requirement is now Python >= 3.9 and Sphinx >= 5. + v0.16.0 ------- Sphinx 7.3.0 and above changed caching and serialization checks. Now instead of passing diff --git a/doc/conf.py b/doc/conf.py index 8b8901be5..a93869541 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -17,6 +17,8 @@ import warnings from datetime import date +from intersphinx_registry import get_intersphinx_mapping + import sphinx_gallery # If extensions (or modules to document with autodoc) are in another directory, @@ -332,15 +334,18 @@ def setup(app): # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = { - "python": (f"https://docs.python.org/{sys.version_info.major}", None), - "numpy": ("https://numpy.org/doc/stable/", None), - "matplotlib": ("https://matplotlib.org/stable", None), - "pyvista": ("https://docs.pyvista.org/version/stable", None), - "sklearn": ("https://scikit-learn.org/stable", None), - "sphinx": ("https://www.sphinx-doc.org/en/master", None), - "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), -} +intersphinx_mapping = get_intersphinx_mapping( + packages={ + "joblib", + "matplotlib", + "numpy", + "pandas", + "python", + "pyvista", + "sklearn", + "sphinx", + }, +) examples_dirs = ["../examples", "../tutorials"] gallery_dirs = ["auto_examples", "tutorials"] @@ -352,30 +357,19 @@ def setup(app): # installed import pyvista except Exception: # can raise all sorts of errors - pass + pyvista = None else: image_scrapers += ("pyvista",) examples_dirs.append("../pyvista_examples") gallery_dirs.append("auto_pyvista_examples") - pyvista.OFF_SCREEN = True - # Preferred plotting style for documentation - pyvista.set_plot_theme("document") - pyvista.global_theme.window_size = [1024, 768] - pyvista.global_theme.font.size = 22 - pyvista.global_theme.font.label_size = 22 - pyvista.global_theme.font.title_size = 22 - pyvista.global_theme.return_cpos = False - # necessary when building the sphinx gallery - pyvista.BUILDING_GALLERY = True - pyvista.set_jupyter_backend(None) # Set plotly renderer to capture _repr_html_ for sphinx-gallery try: + import plotly import plotly.io except ImportError: - pass + plotly = None else: - plotly.io.renderers.default = "sphinx_gallery" examples_dirs.append("../plotly_examples") gallery_dirs.append("auto_plotly_examples") @@ -393,6 +387,7 @@ def setup(app): "examples_dirs": examples_dirs, "gallery_dirs": gallery_dirs, "image_scrapers": image_scrapers, + "reset_modules": ("matplotlib", "seaborn", "sg_doc_build.reset_others"), "compress_images": ("images", "thumbnails"), # specify the order of examples to be according to filename "within_subsection_order": "FileNameSortKey", diff --git a/doc/configuration.rst b/doc/configuration.rst index d5fb0e106..6fe3608d8 100644 --- a/doc/configuration.rst +++ b/doc/configuration.rst @@ -44,6 +44,7 @@ file, inside a ``sphinx_gallery_conf`` dictionary. - ``abort_on_example_error`` (:ref:`abort_on_first`) - ``expected_failing_examples`` (:ref:`dont_fail_exit`) - ``only_warn_on_example_error`` (:ref:`warning_on_error`) +- ``parallel`` (:ref:`parallel`) **Cross-referencing** @@ -2093,6 +2094,35 @@ flag is passed to ``sphinx-build``. This can be enabled by setting:: } +.. _parallel: + +Build examples in parallel +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sphinx-Gallery can be configured to run examples simultaneously using +:mod:`joblib`. This can be enabled by setting:: + + sphinx_gallery_conf = { + ... + 'parallel': 2, + } + +If an ``int``, then that number of jobs will be passed to :class:`joblib.Parallel`. +If ``True``, then the same number of jobs will be used as the ``-j`` flag for +Sphinx. + +.. warning:: + Some packages might not play nicely with parallel processing, so this feature + is considered **experimental**! + + For example, you might need to set variables or call functions in a + :ref:`custom resetter ` to ensure that all spawned processes are + properly set up and torn down. Parallelism is achieved through the Loky backend of + joblib, see :ref:`joblib:parallel` for documentation of many relevant conisderations + (e.g., pickling, oversubscription of CPU resources, etc.). + + Using parallel building will also disable memory measurements. + .. _recommend_examples: Enabling the example recommender system diff --git a/doc/getting_started.rst b/doc/getting_started.rst index 8e4296319..c6d43282d 100644 --- a/doc/getting_started.rst +++ b/doc/getting_started.rst @@ -178,7 +178,7 @@ generated: * ``.py`` - to enable the user to download a ``.py`` version of the example. * ``.py.md5`` - a md5 hash of the ``.py`` file, used to determine if changes have been made to the file and thus if new output files need to be generated. -* ``_codeobj.pickle`` - used to identify function names and to which module +* ``.codeobj.json`` - used to identify function names and to which module they belong (more details in :ref:`sphx_glr_auto_examples_plot_6_function_identifier.py`) diff --git a/doc/sphinxext/sg_doc_build.py b/doc/sphinxext/sg_doc_build.py index 3f9f08630..3bfb93594 100644 --- a/doc/sphinxext/sg_doc_build.py +++ b/doc/sphinxext/sg_doc_build.py @@ -56,3 +56,29 @@ def notebook_modification_function(notebook_content, notebook_filename): notebook_content["cells"] = ( dummy_notebook_content["cells"] + notebook_content["cells"] ) + + +def reset_others(gallery_conf, fname): + """Reset plotting functions.""" + try: + import pyvista + except Exception: + pass + else: + pyvista.OFF_SCREEN = True + # Preferred plotting style for documentation + pyvista.set_plot_theme("document") + pyvista.global_theme.window_size = [1024, 768] + pyvista.global_theme.font.size = 22 + pyvista.global_theme.font.label_size = 22 + pyvista.global_theme.font.title_size = 22 + pyvista.global_theme.return_cpos = False + # necessary when building the sphinx gallery + pyvista.BUILDING_GALLERY = True + pyvista.set_jupyter_backend(None) + try: + import plotly.io + except Exception: + pass + else: + plotly.io.renderers.default = "sphinx_gallery" diff --git a/pyproject.toml b/pyproject.toml index 727f70158..e8b2ae5fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ dynamic = [ ] dependencies = [ "pillow", - "sphinx>=4", + "sphinx>=5", ] optional-dependencies.animations = [ "sphinxcontrib-video", @@ -39,6 +39,7 @@ optional-dependencies.animations = [ optional-dependencies.dev = [ "absl-py", "graphviz", + "intersphinx-registry", "ipython", "joblib", "jupyterlite-sphinx", @@ -57,6 +58,9 @@ optional-dependencies.dev = [ optional-dependencies.jupyterlite = [ "jupyterlite-sphinx", ] +optional-dependencies.parallel = [ + "joblib", +] optional-dependencies.recommender = [ "numpy", ] diff --git a/sphinx_gallery/backreferences.py b/sphinx_gallery/backreferences.py index 2a574b4e0..e955ec616 100644 --- a/sphinx_gallery/backreferences.py +++ b/sphinx_gallery/backreferences.py @@ -6,8 +6,6 @@ """ import ast -import codecs -import collections import inspect import os import re @@ -19,7 +17,7 @@ from sphinx.errors import ExtensionError from .scrapers import _find_image_ext -from .utils import _replace_md5 +from .utils import _W_KW, _replace_md5 THUMBNAIL_PARENT_DIV = """ .. raw:: html @@ -249,9 +247,9 @@ def identify_names(script_blocks, ref_regex, global_variables=None, node=""): Returns ------- - example_code_obj : OrderedDict[str, Any] - OrderedDict with information about all code object references found in an - example. OrderedDict contains the following keys: + example_code_obj : Dict[str, Any] + Dict with information about all code object references found in an + example. Dict contains the following keys: - example_code_obj['name'] : function or class name (str) - example_code_obj['module'] : module name (str) @@ -271,7 +269,7 @@ def identify_names(script_blocks, ref_regex, global_variables=None, node=""): # Get matches from docstring inspection (explicit matches) text = "\n".join(block.content for block in script_blocks if block.type == "text") names.extend((x, x, False, False, True) for x in re.findall(ref_regex, text)) - example_code_obj = collections.OrderedDict() # order is important + example_code_obj = dict() # native dict preserves order nowadays # Make a list of all guesses, in `_embed_code_links` we will break # when we find a match for name, full_name, class_like, is_class, is_explicit in names: @@ -292,13 +290,13 @@ def identify_names(script_blocks, ref_regex, global_variables=None, node=""): # get shortened module name module_short = _get_short_module_name(module, attribute) - cobj = { - "name": attribute, - "module": module, - "module_short": module_short or module, - "is_class": is_class, - "is_explicit": is_explicit, - } + cobj = dict( + name=attribute, + module=module, + module_short=module_short or module, + is_class=is_class, + is_explicit=is_explicit, + ) example_code_obj[name].append(cobj) return example_code_obj @@ -391,9 +389,8 @@ def _write_backreferences( f"{backref}.examples.new", ) seen = backref in seen_backrefs - with codecs.open( - include_path, "a" if seen else "w", encoding="utf-8" - ) as ex_file: + mode = "a" if seen else "w" + with open(include_path, mode, **_W_KW) as ex_file: if not seen: # Be aware that if the number of lines of this heading changes, # the minigallery directive should be modified accordingly @@ -432,7 +429,7 @@ def _finalize_backreferences(seen_backrefs, gallery_conf): if os.path.isfile(path): # Close div containing all thumbnails # (it was open in _write_backreferences) - with codecs.open(path, "a", encoding="utf-8") as ex_file: + with open(path, "a", **_W_KW) as ex_file: ex_file.write(THUMBNAIL_PARENT_DIV_CLOSE) _replace_md5(path, mode="t") else: diff --git a/sphinx_gallery/docs_resolv.py b/sphinx_gallery/docs_resolv.py index 194e6e3a8..6f0c5c0fa 100644 --- a/sphinx_gallery/docs_resolv.py +++ b/sphinx_gallery/docs_resolv.py @@ -2,10 +2,9 @@ # License: 3-clause BSD """Link resolver objects.""" -import codecs import gzip +import json import os -import pickle import posixpath import re import shelve @@ -19,7 +18,7 @@ from sphinx.errors import ExtensionError from sphinx.search import js_index -from .utils import status_iterator +from .utils import _W_KW, _replace_md5, status_iterator logger = sphinx.util.logging.getLogger("sphinx-gallery") @@ -39,7 +38,7 @@ def _get_data(url): raise ExtensionError(f"unknown encoding {encoding!r}") data = data.decode("utf-8") else: - with codecs.open(url, mode="r", encoding="utf-8") as fid: + with open(url, mode="r", encoding="utf-8") as fid: data = fid.read() return data @@ -243,8 +242,8 @@ def resolve(self, cobj, this_url, return_type=False): Parameters ---------- - cobj : OrderedDict[str, Any] - OrderedDict with information about the "code object" for which we are + cobj : Dict[str, Any] + Dict with information about the "code object" for which we are resolving a link. - cobj['name'] : function or class name (str) @@ -253,7 +252,6 @@ def resolve(self, cobj, this_url, return_type=False): - cobj['is_class'] : whether object is class (bool) - cobj['is_explicit'] : whether object is an explicit backreference (referred to by sphinx markup) (bool) - this_url: str URL of the current page. Needed to construct relative URLs (only used if relative=True in constructor). @@ -334,6 +332,22 @@ def _get_intersphinx_inventory(app): return intersphinx_inv +# Whatever mechanism is used for writing here should be paired with reading in +# _embed_code_links +def _write_code_obj(target_file, example_code_obj): + codeobj_fname = target_file.with_name(target_file.stem + ".codeobj.json.new") + with open(codeobj_fname, "w", **_W_KW) as fid: + json.dump( + example_code_obj, + fid, + sort_keys=True, + ensure_ascii=False, + indent=1, + check_circular=False, + ) + _replace_md5(codeobj_fname, check="json") + + def _embed_code_links(app, gallery_conf, gallery_dir): """Add resolvers for the packages for which we want to show links.""" doc_resolvers = {} @@ -368,6 +382,7 @@ def _embed_code_links(app, gallery_conf, gallery_dir): [dirpath, filename] for dirpath, _, filenames in os.walk(html_gallery_dir) for filename in filenames + if filename.endswith(".html") ] iterator = status_iterator( flat, @@ -380,15 +395,15 @@ def _embed_code_links(app, gallery_conf, gallery_dir): for dirpath, fname in iterator: full_fname = os.path.join(html_gallery_dir, dirpath, fname) subpath = dirpath[len(html_gallery_dir) + 1 :] - pickle_fname = os.path.join( - src_gallery_dir, subpath, fname[:-5] + "_codeobj.pickle" + json_fname = os.path.join( + src_gallery_dir, subpath, fname[:-5] + ".codeobj.json" ) - if not os.path.exists(pickle_fname): + if not os.path.exists(json_fname): continue - # we have a pickle file with the objects to embed links for - with open(pickle_fname, "rb") as fid: - example_code_obj = pickle.load(fid) + # we have a json file with the objects to embed links for + with open(json_fname, "r", encoding="utf-8") as fid: + example_code_obj = json.load(fid) # generate replacement strings with the links str_repl = {} for name in sorted(example_code_obj): @@ -472,9 +487,9 @@ def substitute_link(match): return str_repl[match.group()] if len(str_repl) > 0: - with codecs.open(full_fname, "r", "utf-8") as fid: + with open(full_fname, "r", encoding="utf-8") as fid: lines_in = fid.readlines() - with codecs.open(full_fname, "w", "utf-8") as fid: + with open(full_fname, "w", **_W_KW) as fid: for line in lines_in: line_out = regex.sub(substitute_link, line) fid.write(line_out) diff --git a/sphinx_gallery/gen_gallery.py b/sphinx_gallery/gen_gallery.py index 37733516f..fcdffc048 100644 --- a/sphinx_gallery/gen_gallery.py +++ b/sphinx_gallery/gen_gallery.py @@ -138,6 +138,7 @@ def __call__(self, gallery_conf, script_vars): "api_usage_ignore": ".*__.*__", "show_api_usage": False, # if this changes, change write_api_entries, too "copyfile_regex": "", + "parallel": False, } logger = sphinx.util.logging.getLogger("sphinx-gallery") @@ -400,8 +401,20 @@ def _fill_gallery_conf_defaults(sphinx_gallery_conf, app=None, check_keys=True): # Check ignore_repr_types _check_config_type(gallery_conf, "ignore_repr_types", str) + # Check parallel + _check_config_type(gallery_conf, "parallel", (bool, int)) + if gallery_conf["parallel"] is True: + gallery_conf["parallel"] = app.parallel + if gallery_conf["parallel"] == 1: + gallery_conf["parallel"] = False + if gallery_conf["parallel"]: + try: + import joblib # noqa + except Exception: + raise ValueError("joblib must be importable when parallel mode is enabled") + # deal with show_memory - _get_call_memory_and_base(gallery_conf) + _get_call_memory_and_base(gallery_conf, update=True) # check callables for key in ( @@ -578,8 +591,7 @@ def _prepare_sphx_glr_dirs(gallery_conf, srcdir): if bool(gallery_conf["backreferences_dir"]): backreferences_dir = os.path.join(srcdir, gallery_conf["backreferences_dir"]) - if not os.path.exists(backreferences_dir): - os.makedirs(backreferences_dir) + os.makedirs(backreferences_dir, exist_ok=True) return list(zip(examples_dirs, gallery_dirs)) @@ -714,8 +726,11 @@ def generate_gallery_rst(app): each sub-section, with each header followed by a toctree linking to every example in the root gallery/sub-section. """ - logger.info("generating gallery...", color="white") gallery_conf = app.config.sphinx_gallery_conf + extra = "" + if gallery_conf["parallel"]: + extra = f" (with parallel={gallery_conf['parallel']})" + logger.info(f"generating gallery{extra}...", color="white") seen_backrefs = set() @@ -1380,8 +1395,7 @@ def write_junit_xml(gallery_conf, target_dir, costs): # Actually write it fname = os.path.normpath(os.path.join(target_dir, gallery_conf["junit"])) junit_dir = os.path.dirname(fname) - if not os.path.isdir(junit_dir): - os.makedirs(junit_dir) + os.makedirs(junit_dir, exist_ok=True) with codecs.open(fname, "w", encoding="utf-8") as fid: fid.write(output) @@ -1415,17 +1429,17 @@ def _expected_failing_examples(gallery_conf): def _parse_failures(gallery_conf): """Split the failures.""" - failing_examples = set(gallery_conf["failing_examples"].keys()) + failing_examples = set(gallery_conf["failing_examples"]) expected_failing_examples = _expected_failing_examples(gallery_conf) failing_as_expected = failing_examples.intersection(expected_failing_examples) failing_unexpectedly = failing_examples.difference(expected_failing_examples) passing_unexpectedly = expected_failing_examples.difference(failing_examples) # filter from examples actually run - passing_unexpectedly = [ + passing_unexpectedly = set( src_file for src_file in passing_unexpectedly if re.search(gallery_conf["filename_pattern"], src_file) - ] + ) return failing_as_expected, failing_unexpectedly, passing_unexpectedly @@ -1453,7 +1467,9 @@ def summarize_failing_examples(app, exception): idt = " " if failing_as_expected: - logger.info(bold("Examples failing as expected:"), color="blue") + logger.info( + bold(blue(f"Examples failing as expected ({len(failing_as_expected)}):")) + ) for fail_example in failing_as_expected: path = os.path.relpath(fail_example, gallery_conf["src_dir"]) logger.info( @@ -1463,7 +1479,9 @@ def summarize_failing_examples(app, exception): fail_msgs = [] if failing_unexpectedly: - fail_msgs.append(bold(red("Unexpected failing examples:\n"))) + fail_msgs.append( + bold(red(f"Unexpected failing examples ({len(failing_unexpectedly)}):\n")) + ) for fail_example in failing_unexpectedly: path = os.path.relpath(fail_example, gallery_conf["src_dir"]) fail_msgs.append( @@ -1476,7 +1494,7 @@ def summarize_failing_examples(app, exception): os.path.relpath(p, gallery_conf["src_dir"]) for p in passing_unexpectedly ] fail_msgs.append( - bold(red("Examples expected to fail, but not failing:\n\n")) + bold(red(f"Examples expected to fail, but not failing ({len(paths)}):\n\n")) + red("\n".join(indent(p, idt) for p in paths)) + "\n\nPlease remove these examples from " + "sphinx_gallery_conf['expected_failing_examples'] " diff --git a/sphinx_gallery/gen_rst.py b/sphinx_gallery/gen_rst.py index 7cd95234e..3d1d559a4 100644 --- a/sphinx_gallery/gen_rst.py +++ b/sphinx_gallery/gen_rst.py @@ -9,7 +9,6 @@ """ import ast -import codecs import codeop import contextlib import copy @@ -17,10 +16,8 @@ import importlib import inspect import os -import pickle import re import stat -import subprocess import sys import traceback import warnings @@ -45,7 +42,12 @@ identify_names, ) from .block_parser import BlockParser -from .interactive_example import gen_binder_rst, gen_jupyterlite_rst +from .docs_resolv import _write_code_obj +from .interactive_example import ( + _add_jupyterlite_badge_logo, + gen_binder_rst, + gen_jupyterlite_rst, +) from .notebook import jupyter_notebook, save_notebook from .scrapers import ( ImagePathIterator, @@ -56,6 +58,7 @@ save_figures, ) from .utils import ( + _W_KW, _collect_gallery_files, _format_toctree, _replace_md5, @@ -338,16 +341,16 @@ def extract_intro_and_title(filename, docstring): def md5sum_is_current(src_file, mode="b"): """Checks whether src_file has the same md5 hash as the one on disk.""" - src_md5 = get_md5sum(src_file, mode) + src_md5 = get_md5sum(src_file, mode=mode) src_md5_file = str(src_file) + ".md5" - if os.path.exists(src_md5_file): - with open(src_md5_file) as file_checksum: - ref_md5 = file_checksum.read() + if not os.path.exists(src_md5_file): + return False - return src_md5 == ref_md5 + with open(src_md5_file) as file_cs: + ref_md5 = file_cs.read() - return False + return src_md5 == ref_md5 def save_thumbnail(image_path_template, src_file, script_vars, file_conf, gallery_conf): @@ -368,8 +371,7 @@ def save_thumbnail(image_path_template, src_file, script_vars, file_conf, galler Sphinx-Gallery configuration dictionary """ thumb_dir = os.path.join(os.path.dirname(image_path_template), "thumb") - if not os.path.exists(thumb_dir): - os.makedirs(thumb_dir) + os.makedirs(thumb_dir, exist_ok=True) # read specification of the figure to display as thumbnail from main text thumbnail_number = file_conf.get("thumbnail_number", None) @@ -396,7 +398,7 @@ def save_thumbnail(image_path_template, src_file, script_vars, file_conf, galler base_image_name = os.path.splitext(os.path.basename(src_file))[0] thumb_file = os.path.join(thumb_dir, f"sphx_glr_{base_image_name}_thumb.{ext}") - if src_file in gallery_conf["failing_examples"]: + if "formatted_exception" in script_vars: img = os.path.join(glr_path_static(), "broken_example.png") elif os.path.exists(thumbnail_image_path): img = thumbnail_image_path @@ -470,7 +472,7 @@ def _write_subsection_index( if gallery_conf["nested_sections"] and not user_index_rst and is_subsection: index_path = os.path.join(target_dir, "index.rst.new") head_ref = os.path.relpath(target_dir, gallery_conf["src_dir"]) - with codecs.open(index_path, "w", encoding="utf-8") as (findex): + with open(index_path, "w", **_W_KW) as findex: findex.write( "\n\n.. _sphx_glr_{}:\n\n".format(head_ref.replace(os.sep, "_")) ) @@ -541,15 +543,21 @@ def generate_dir_rst( user_index_rst = True if header_fname: user_index_rst = False - with codecs.open(header_fname, "r", encoding="utf-8") as fid: + with open(header_fname, "r", encoding="utf-8") as fid: header_content = fid.read() index_content += header_content # Add empty lines to avoid bug in issue #165 index_content += "\n\n" - if not os.path.exists(target_dir): - os.makedirs(target_dir) + # Make all dirs ahead of time to avoid collisions in parallel processing + os.makedirs(target_dir, exist_ok=True) + image_dir = os.path.join(target_dir, "images") + os.makedirs(image_dir, exist_ok=True) + thumb_dir = os.path.join(image_dir, "thumb") + os.makedirs(thumb_dir, exist_ok=True) + if gallery_conf["jupyterlite"] is not None: + _add_jupyterlite_badge_logo(image_dir) # Get example filenames from `src_dir` listdir = _collect_gallery_files([src_dir], gallery_conf) @@ -570,11 +578,38 @@ def generate_dir_rst( f"generating gallery for {build_target_dir}... ", length=len(sorted_listdir), ) - for fname in iterator: - intro, title, (t, mem) = generate_file_rst( - fname, target_dir, src_dir, gallery_conf, seen_backrefs + + parallel = list + p_fun = generate_file_rst + if gallery_conf["parallel"]: + from joblib import Parallel, delayed + + p_fun = delayed(generate_file_rst) + parallel = Parallel( + n_jobs=gallery_conf["parallel"], + pre_dispatch="n_jobs", + batch_size=1, + backend="loky", ) + + results = parallel( + p_fun(fname, target_dir, src_dir, gallery_conf) for fname in iterator + ) + for fi, (intro, title, (t, mem), out_vars) in enumerate(results): + fname = sorted_listdir[fi] src_file = os.path.normpath(os.path.join(src_dir, fname)) + gallery_conf["titles"][src_file] = title + # n.b. non-executable files have none of these three variables defined, + # so the last conditional must be "elif" not just "else" + if "formatted_exception" in out_vars: + assert "passing" not in out_vars + assert "stale" not in out_vars + gallery_conf["failing_examples"][src_file] = out_vars["formatted_exception"] + elif "passing" in out_vars: + assert "stale" not in out_vars + gallery_conf["passing_examples"].append(src_file) + elif "stale" in out_vars: + gallery_conf["stale_examples"].append(out_vars["stale"]) costs.append(dict(t=t, mem=mem, src_file=src_file, target_dir=target_dir)) gallery_item_filename = ( (Path(build_target_dir) / fname).with_suffix("").as_posix() @@ -585,6 +620,18 @@ def generate_dir_rst( index_content += this_entry toctree_filenames.append("/" + gallery_item_filename) + # Write backreferences + if "backrefs" in out_vars: + _write_backreferences( + out_vars["backrefs"], + seen_backrefs, + gallery_conf, + target_dir, + fname, + intro, + title, + ) + # Close thumbnail parent div index_content += THUMBNAIL_PARENT_DIV_CLOSE @@ -678,7 +725,7 @@ def handle_exception(exc_info, src_file, script_vars, gallery_conf): if gallery_conf["abort_on_example_error"]: raise # Stores failing file - gallery_conf["failing_examples"][src_file] = formatted_exception + script_vars["formatted_exception"] = formatted_exception script_vars["execute_script"] = False # Ensure it's marked as our style @@ -737,23 +784,10 @@ def __call__(self): def _get_memory_base(): - """Get the base amount of memory used by running a Python process.""" - # There might be a cleaner way to do this at some point + """Get the base amount of memory used by the current Python process.""" from memory_profiler import memory_usage - if sys.platform in ("win32", "darwin"): - sleep, timeout = (1, 2) - else: - sleep, timeout = (0.5, 1) - proc = subprocess.Popen( - [sys.executable, "-c", f"import time, sys; time.sleep({sleep}); sys.exit(0)"], - close_fds=True, - ) - memories = memory_usage(proc, interval=1e-3, timeout=timeout) - proc.communicate(timeout=timeout) - # On OSX sometimes the last entry can be None - memories = [mem for mem in memories if mem is not None] + [0.0] - memory_base = max(memories) + memory_base = memory_usage(max_usage=True) return memory_base @@ -1140,9 +1174,9 @@ def execute_script(script_blocks, script_vars, gallery_conf, file_conf): script_vars["memory_delta"] -= memory_start # Write md5 checksum if the example was meant to run (no-plot # shall not cache md5sum) and has built correctly - with open(script_vars["target_file"] + ".md5", "w") as file_checksum: - file_checksum.write(get_md5sum(script_vars["target_file"], "t")) - gallery_conf["passing_examples"].append(script_vars["src_file"]) + with open(script_vars["target_file"] + ".md5", "w") as file_cs: + file_cs.write(get_md5sum(script_vars["target_file"], mode="t")) + script_vars["passing"] = True return output_blocks, time_elapsed @@ -1199,10 +1233,7 @@ def _get_backreferences(gallery_conf, script_vars, script_blocks, node, target_f ref_regex = _make_ref_regex(gallery_conf["default_role"]) example_code_obj = identify_names(script_blocks, ref_regex, global_variables, node) if example_code_obj: - codeobj_fname = target_file.with_name(target_file.stem + "_codeobj.pickle.new") - with open(codeobj_fname, "wb") as fid: - pickle.dump(example_code_obj, fid, pickle.HIGHEST_PROTOCOL) - _replace_md5(codeobj_fname) + _write_code_obj(target_file, example_code_obj) exclude_regex = gallery_conf["exclude_implicit_doc_regex"] backrefs = { "{module_short}.{name}".format(**cobj) @@ -1224,7 +1255,7 @@ def _get_backreferences(gallery_conf, script_vars, script_blocks, node, target_f return backrefs -def generate_file_rst(fname, target_dir, src_dir, gallery_conf, seen_backrefs=None): +def generate_file_rst(fname, target_dir, src_dir, gallery_conf): """Generate the rst file for a given example. Parameters @@ -1237,21 +1268,32 @@ def generate_file_rst(fname, target_dir, src_dir, gallery_conf, seen_backrefs=No Absolute path to directory where source examples are stored. gallery_conf : dict Contains the configuration of Sphinx-Gallery. - seen_backrefs : set - The seen backreferences. Returns ------- intro: str The introduction of the example. + title : str + The example title. cost : tuple A tuple containing the ``(time_elapsed, memory_used)`` required to run the script. + out_vars : dict + Variables used to run the script, possibly with entries: + + "stale" + True if the example was stale. + "backrefs" + The backreferences. + "passing" + True if the example passed. + "formatted_exception" + Formatted string of the exception. """ - seen_backrefs = set() if seen_backrefs is None else seen_backrefs src_file = os.path.normpath(os.path.join(src_dir, fname)) + out_vars = dict() target_file = Path(target_dir) / fname - _replace_md5(src_file, target_file, "copy", mode="t") + _replace_md5(src_file, target_file, method="copy", mode="t") parser, language = _get_parser(fname, gallery_conf) @@ -1260,23 +1302,22 @@ def generate_file_rst(fname, target_dir, src_dir, gallery_conf, seen_backrefs=No ) intro, title = extract_intro_and_title(fname, script_blocks[0].content) - gallery_conf["titles"][src_file] = title executable = executable_script(src_file, gallery_conf) if md5sum_is_current(target_file, mode="t"): do_return = True + logger.debug(f"md5sum is current: {target_file}") if executable: if gallery_conf["run_stale_examples"]: do_return = False else: - gallery_conf["stale_examples"].append(str(target_file)) + out_vars["stale"] = str(target_file) if do_return: - return intro, title, (0, 0) + return intro, title, (0, 0), out_vars image_dir = os.path.join(target_dir, "images") - if not os.path.exists(image_dir): - os.makedirs(image_dir) + os.makedirs(image_dir, exist_ok=True) base_image_name = os.path.splitext(fname)[0] image_fname = "sphx_glr_" + base_image_name + "_{0:03}.png" @@ -1340,26 +1381,23 @@ def generate_file_rst(fname, target_dir, src_dir, gallery_conf, seen_backrefs=No # Produce the zip file of all sources zip_files(files_to_zip, target_file.with_suffix(".zip"), target_dir) - # Write names - backrefs = _get_backreferences( + # Get names + out_vars["backrefs"] = _get_backreferences( gallery_conf, script_vars, script_blocks, node, target_file, ) - - # Write backreferences - _write_backreferences( - backrefs, seen_backrefs, gallery_conf, target_dir, fname, intro, title - ) - - # Don't keep this during reset + for key in ("passing", "formatted_exception"): + if key in script_vars: + out_vars[key] = script_vars[key] + # don't keep this during reset del script_vars if executable and gallery_conf["reset_modules_order"] in ["after", "both"]: clean_modules(gallery_conf, fname, "after") - return intro, title, (time_elapsed, memory_used) + return intro, title, (time_elapsed, memory_used), out_vars EXAMPLE_HEADER = """ @@ -1546,7 +1584,7 @@ def save_rst_example( example_rst += SPHX_GLR_SIG write_file_new = example_file.with_suffix(".rst.new") - with codecs.open(write_file_new, "w", encoding="utf-8") as f: + with open(write_file_new, "w", **_W_KW) as f: f.write(example_rst) # make it read-only so that people don't try to edit it mode = os.stat(write_file_new).st_mode @@ -1651,21 +1689,28 @@ def _sg_call_memory_noop(func): return 0.0, func() -def _get_call_memory_and_base(gallery_conf): +def _get_call_memory_and_base(gallery_conf, *, update=False): show_memory = gallery_conf["show_memory"] # Default to no-op version call_memory = _sg_call_memory_noop memory_base = 0.0 - if show_memory: + if show_memory and gallery_conf["plot_gallery"]: if callable(show_memory): call_memory = show_memory - elif gallery_conf["plot_gallery"]: # True-like + elif gallery_conf["parallel"]: + if update: + logger.warning( + f"{gallery_conf['show_memory']=} disabled due to " + f"{gallery_conf['parallel']=}." + ) + gallery_conf["show_memory"] = False + else: out = _get_memprof_call_memory() if out is not None: call_memory, memory_base = out - else: + elif update: gallery_conf["show_memory"] = False assert callable(call_memory) diff --git a/sphinx_gallery/interactive_example.py b/sphinx_gallery/interactive_example.py index 99ac8abf3..775e3a53c 100644 --- a/sphinx_gallery/interactive_example.py +++ b/sphinx_gallery/interactive_example.py @@ -167,8 +167,7 @@ def _copy_binder_reqs(app, binder_conf): ) binder_folder = os.path.join(app.outdir, "binder") - if not os.path.isdir(binder_folder): - os.makedirs(binder_folder) + os.makedirs(binder_folder, exist_ok=True) # Copy over the requirements to the output directory for path in path_reqs: @@ -206,7 +205,7 @@ def _copy_binder_notebooks(app): binder_conf = gallery_conf["binder"] notebooks_dir = os.path.join(app.outdir, binder_conf["notebooks_dir"]) shutil.rmtree(notebooks_dir, ignore_errors=True) - os.makedirs(notebooks_dir) + os.makedirs(notebooks_dir, exist_ok=True) if not isinstance(gallery_dirs, (list, tuple)): gallery_dirs = [gallery_dirs] @@ -424,14 +423,8 @@ def gen_jupyterlite_rst(fpath, gallery_conf): # Similar work-around for badge file as in # gen_binder_rst - physical_path = os.path.join( - os.path.dirname(fpath), "images", "jupyterlite_badge_logo.svg" - ) - os.makedirs(os.path.dirname(physical_path), exist_ok=True) - if not os.path.isfile(physical_path): - shutil.copyfile( - os.path.join(glr_path_static(), "jupyterlite_badge_logo.svg"), physical_path - ) + image_dir = os.path.join(os.path.dirname(fpath), "images") + _add_jupyterlite_badge_logo(image_dir) rst = ( "\n" " .. container:: lite-badge\n\n" @@ -443,6 +436,15 @@ def gen_jupyterlite_rst(fpath, gallery_conf): return rst +def _add_jupyterlite_badge_logo(image_dir): + os.makedirs(image_dir, exist_ok=True) + physical_path = os.path.join(image_dir, "jupyterlite_badge_logo.svg") + if not os.path.isfile(physical_path): + shutil.copyfile( + os.path.join(glr_path_static(), "jupyterlite_badge_logo.svg"), physical_path + ) + + def check_jupyterlite_conf(jupyterlite_conf, app): """Return full JupyterLite configuration with defaults.""" # app=None can happen for testing diff --git a/sphinx_gallery/py_source_parser.py b/sphinx_gallery/py_source_parser.py index 8ae04787b..e52461c54 100644 --- a/sphinx_gallery/py_source_parser.py +++ b/sphinx_gallery/py_source_parser.py @@ -4,7 +4,6 @@ # Author: Óscar Nájera import ast -import codecs import re import tokenize from collections import namedtuple @@ -57,10 +56,9 @@ def parse_source_file(filename): node : AST node content : utf-8 encoded string """ - with codecs.open(filename, "r", "utf-8") as fid: + # builtin open automatically converts \r\n to \n + with open(filename, "r", encoding="utf-8") as fid: content = fid.read() - # change from Windows format to UNIX for uniformity - content = content.replace("\r\n", "\n") try: node = ast.parse(content) diff --git a/sphinx_gallery/tests/test_full.py b/sphinx_gallery/tests/test_full.py index 76062ef98..e97304f1b 100644 --- a/sphinx_gallery/tests/test_full.py +++ b/sphinx_gallery/tests/test_full.py @@ -14,6 +14,7 @@ from io import StringIO from pathlib import Path +import lxml.etree import lxml.html import pytest from packaging.version import Version @@ -61,6 +62,7 @@ manim = pytest.importorskip("matplotlib.animation") if not manim.writers.is_available("ffmpeg"): pytest.skip("ffmpeg is not available", allow_module_level=True) +pytest.importorskip("joblib") @pytest.fixture(scope="module") @@ -77,8 +79,8 @@ def _sphinx_app(tmpdir_factory, buildername): # Skip if numpy not installed pytest.importorskip("numpy") - temp_dir = (tmpdir_factory.getbasetemp() / f"root_{buildername}").strpath - src_dir = op.join(op.dirname(__file__), "tinybuild") + temp_dir = tmpdir_factory.getbasetemp() / f"root_{buildername}" + src_dir = Path(__file__).parent / "tinybuild" def ignore(src, names): return ("_build", "gen_modules", "auto_examples") @@ -86,9 +88,9 @@ def ignore(src, names): shutil.copytree(src_dir, temp_dir, ignore=ignore) # For testing iteration, you can get similar behavior just doing `make` # inside the tinybuild/doc directory - conf_dir = op.join(temp_dir, "doc") - out_dir = op.join(conf_dir, "_build", buildername) - toctrees_dir = op.join(conf_dir, "_build", "toctrees") + conf_dir = temp_dir / "doc" + out_dir = conf_dir / "_build" / buildername + toctrees_dir = conf_dir / "_build" / "toctrees" # Avoid warnings about re-registration, see: # https://github.com/sphinx-doc/sphinx/issues/5038 with docutils_namespace(): @@ -180,33 +182,47 @@ def test_optipng(sphinx_app): assert "optipng version" not in status.lower() # catch the --version -def test_junit(sphinx_app, tmpdir): +def test_junit(sphinx_app, tmp_path): + """Test junit output.""" out_dir = sphinx_app.outdir - junit_file = op.join(out_dir, "sphinx-gallery", "junit-results.xml") - assert op.isfile(junit_file) - with codecs.open(junit_file, "r", "utf-8") as fid: + junit_file = Path(out_dir) / "sphinx-gallery" / "junit-results.xml" + assert junit_file.is_file() + with open(junit_file, "rb") as fid: contents = fid.read() - assert contents.startswith("= Version("4.1") for orig, new in zip(list_orig, list_new): check_name = op.splitext(op.basename(orig))[0] - if check_name.endswith("_codeobj"): + if check_name.endswith(".codeobj"): check_name = check_name[:-8] if check_name in different: if good_sphinx: @@ -619,7 +638,7 @@ def _assert_mtimes(list_orig, list_new, different=(), ignore=()): op.getmtime(new), atol=1e-3, rtol=1e-20, - err_msg=op.basename(orig), + err_msg=f"{op.basename(orig)} was updated but should not have been", ) @@ -655,10 +674,10 @@ def test_rebuild(tmpdir_factory, sphinx_app): for f in os.listdir(op.join(old_src_dir, "auto_examples")) if f.endswith(".rst") ) - generated_pickle_0 = sorted( + generated_json_0 = sorted( op.join(old_src_dir, "auto_examples", f) for f in os.listdir(op.join(old_src_dir, "auto_examples")) - if f.endswith(".pickle") + if f.endswith(".json") ) copied_py_0 = sorted( op.join(old_src_dir, "auto_examples", f) @@ -673,7 +692,7 @@ def test_rebuild(tmpdir_factory, sphinx_app): assert len(generated_modules_0) > 0 assert len(generated_backrefs_0) > 0 assert len(generated_rst_0) > 0 - assert len(generated_pickle_0) > 0 + assert len(generated_json_0) > 0 assert len(copied_py_0) > 0 assert len(copied_ipy_0) > 0 assert len(sphinx_app.config.sphinx_gallery_conf["stale_examples"]) == 0 @@ -747,10 +766,10 @@ def test_rebuild(tmpdir_factory, sphinx_app): for f in os.listdir(op.join(new_app.srcdir, "auto_examples")) if f.endswith(".rst") ) - generated_pickle_1 = sorted( + generated_json_1 = sorted( op.join(new_app.srcdir, "auto_examples", f) for f in os.listdir(op.join(new_app.srcdir, "auto_examples")) - if f.endswith(".pickle") + if f.endswith(".json") ) copied_py_1 = sorted( op.join(new_app.srcdir, "auto_examples", f) @@ -781,8 +800,8 @@ def test_rebuild(tmpdir_factory, sphinx_app): ) _assert_mtimes(generated_rst_0, generated_rst_1, ignore=ignore) - # mtimes for pickles - _assert_mtimes(generated_pickle_0, generated_pickle_1) + # mtimes for jsons + _assert_mtimes(generated_json_0, generated_json_1) # mtimes for .py files (gh-395) _assert_mtimes(copied_py_0, copied_py_1) @@ -806,7 +825,7 @@ def test_rebuild(tmpdir_factory, sphinx_app): generated_modules_0, generated_backrefs_0, generated_rst_0, - generated_pickle_0, + generated_json_0, copied_py_0, copied_ipy_0, ) @@ -821,7 +840,7 @@ def _rerun( generated_modules_0, generated_backrefs_0, generated_rst_0, - generated_pickle_0, + generated_json_0, copied_py_0, copied_ipy_0, ): @@ -916,10 +935,10 @@ def _rerun( for f in os.listdir(op.join(new_app.srcdir, "auto_examples")) if f.endswith(".rst") ) - generated_pickle_1 = sorted( + generated_json_1 = sorted( op.join(new_app.srcdir, "auto_examples", f) for f in os.listdir(op.join(new_app.srcdir, "auto_examples")) - if f.endswith(".pickle") + if f.endswith(".json") ) copied_py_1 = sorted( op.join(new_app.srcdir, "auto_examples", f) @@ -955,9 +974,9 @@ def _rerun( if not bad: _assert_mtimes(generated_rst_0, generated_rst_1, different, ignore) - # mtimes for pickles + # mtimes for jsons use_different = () if how == "run_stale" else different - _assert_mtimes(generated_pickle_0, generated_pickle_1, ignore=ignore) + _assert_mtimes(generated_json_0, generated_json_1, ignore=ignore) # mtimes for .py files (gh-395) _assert_mtimes(copied_py_0, copied_py_1, different=use_different) @@ -985,7 +1004,7 @@ def test_error_messages(sphinx_app, name, want): """Test that informative error messages are added.""" src_dir = Path(sphinx_app.srcdir) rst = (src_dir / "auto_examples" / (name + ".rst")).read_text("utf-8") - assert re.match(want, rst, re.DOTALL) is not None + assert re.match(want, rst, re.DOTALL) is not None, f"{name} should have had: {want}" @pytest.mark.parametrize( diff --git a/sphinx_gallery/tests/test_gen_gallery.py b/sphinx_gallery/tests/test_gen_gallery.py index cf3c156b7..08765f27a 100644 --- a/sphinx_gallery/tests/test_gen_gallery.py +++ b/sphinx_gallery/tests/test_gen_gallery.py @@ -545,7 +545,7 @@ def test_examples_not_expected_to_pass(sphinx_app_wrapper): def test_show_memory_callable(sphinx_app_wrapper): sphinx_app = sphinx_app_wrapper.build_sphinx_app() status = sphinx_app._status.getvalue() - assert "0.0 MB" in status + assert "0.0 MB" in status, status @pytest.mark.parametrize( diff --git a/sphinx_gallery/tests/test_gen_rst.py b/sphinx_gallery/tests/test_gen_rst.py index 92ca2d2f8..be9932999 100644 --- a/sphinx_gallery/tests/test_gen_rst.py +++ b/sphinx_gallery/tests/test_gen_rst.py @@ -376,26 +376,20 @@ def test_extract_intro_and_title(): ["t", "ea8a570e9f3afc0a7c3f2a17a48b8047"], ), ) -def test_md5sums(mode, expected_md5): +def test_md5sums(mode, expected_md5, tmp_path): """Test md5sum check functions work on know file content.""" file_content = b"Local test\r\n" - with tempfile.NamedTemporaryFile("wb", delete=False) as f: - f.write(file_content) - try: - file_md5 = sg.get_md5sum(f.name, mode) - # verify correct md5sum - assert file_md5 == expected_md5 - # False because is a new file - assert not sg.md5sum_is_current(f.name) - # Write md5sum to file to check is current - with open(f.name + ".md5", "w") as file_checksum: - file_checksum.write(file_md5) - try: - assert sg.md5sum_is_current(f.name, mode) - finally: - os.remove(f.name + ".md5") - finally: - os.remove(f.name) + fname = tmp_path / "test" + fname.write_bytes(file_content) + file_md5 = sg.get_md5sum(fname, mode) + # verify correct md5sum + assert file_md5 == expected_md5, mode + # False because is a new file + assert not sg.md5sum_is_current(fname), mode + # Write md5sum to file to check is current + with open(str(fname) + ".md5", "w") as file_checksum: + file_checksum.write(file_md5) + assert sg.md5sum_is_current(fname, mode), mode @pytest.mark.parametrize( @@ -477,9 +471,18 @@ def _generate_rst(gallery_conf, fname, content): os.path.join(gallery_conf["examples_dir"], fname), mode="w", encoding="utf-8" ) as f: f.write("\n".join(content)) + with codecs.open( + os.path.join(gallery_conf["examples_dir"], "README.txt"), "w", "utf8" + ): + pass + # generate rst file - sg.generate_file_rst( - fname, gallery_conf["gallery_dir"], gallery_conf["examples_dir"], gallery_conf + generate_dir_rst( + gallery_conf["examples_dir"], + gallery_conf["gallery_dir"], + gallery_conf, + set(), + is_subsection=False, ) # read rst file and check if it contains code output rst_fname = os.path.splitext(fname)[0] + ".rst" diff --git a/sphinx_gallery/tests/tinybuild/doc/conf.py b/sphinx_gallery/tests/tinybuild/doc/conf.py index 1d1455f0e..06999043e 100644 --- a/sphinx_gallery/tests/tinybuild/doc/conf.py +++ b/sphinx_gallery/tests/tinybuild/doc/conf.py @@ -79,7 +79,7 @@ "../examples/future/plot_future_imports_broken.py", "../examples/plot_scraper_broken.py", ], - "show_memory": True, + "show_memory": False, "compress_images": ("images", "thumbnails"), "junit": op.join("sphinx-gallery", "junit-results.xml"), "matplotlib_animations": (True, "mp4"), @@ -89,6 +89,7 @@ "show_api_usage": True, "copyfile_regex": r".*\.rst", "recommender": {"enable": True, "n_examples": 3}, + "parallel": 2, } nitpicky = True highlight_language = "python3" diff --git a/sphinx_gallery/utils.py b/sphinx_gallery/utils.py index 21fc5fc3d..0c7e37a38 100644 --- a/sphinx_gallery/utils.py +++ b/sphinx_gallery/utils.py @@ -7,10 +7,12 @@ # License: 3-clause BSD import hashlib +import json import os import re import subprocess import zipfile +from functools import partial from pathlib import Path from shutil import copyfile, move @@ -26,6 +28,10 @@ logger = sphinx.util.logging.getLogger("sphinx-gallery") +# Text writing kwargs for builtins.open +_W_KW = dict(encoding="utf-8", newline="\n") + + def _get_image(): try: from PIL import Image @@ -140,14 +146,15 @@ def get_md5sum(src_file, mode="b"): kwargs = {"errors": "surrogateescape", "encoding": "utf-8"} else: kwargs = {} - with open(src_file, "r" + mode, **kwargs) as src_data: + # Universal newline mode is intentional here + with open(src_file, f"r{mode}", **kwargs) as src_data: src_content = src_data.read() if mode == "t": src_content = src_content.encode(**kwargs) return hashlib.md5(src_content).hexdigest() -def _replace_md5(fname_new, fname_old=None, method="move", mode="b"): +def _replace_md5(fname_new, fname_old=None, *, method="move", mode="b", check="md5"): fname_new = str(fname_new) # convert possible Path assert method in ("move", "copy") if fname_old is None: @@ -155,7 +162,19 @@ def _replace_md5(fname_new, fname_old=None, method="move", mode="b"): fname_old = os.path.splitext(fname_new)[0] replace = True if os.path.isfile(fname_old): - if get_md5sum(fname_old, mode) == get_md5sum(fname_new, mode): + if check == "md5": # default + func = partial(get_md5sum, mode=mode) + else: + assert check == "json" + + def func(x): + return json.loads(Path(x).read_text("utf-8")) + + try: + equiv = func(fname_old) == func(fname_new) + except Exception: # e.g., old JSON file is a problem + equiv = False + if equiv: replace = False if method == "move": os.remove(fname_new)