From 138df72cf75dc7cd947137bfda24995dd24d3e88 Mon Sep 17 00:00:00 2001 From: Antony Lee Date: Fri, 13 Dec 2019 15:25:41 +0100 Subject: [PATCH] Use test cache for test result images too. For image comparison tests in svg/pdf/ps formats, the result images are converted to png for comparison. Previously the conversion results were cached for the baseline images, but not for the test-generated images (because of non-deterministic svg/pdf/etc. results, due to hash-salting, dict ordering, etc.). Now that the test-generated images are generally deterministic, we can enable the cache for baseline images as well. This speeds up `pytest -k '[svg]'` by ~30% (81s initially -> 55s on a seeded cache) and `pytest -k '[pdf]'` by ~10% (62s -> 55s) (there are too few (e)ps image comparison tests to see an effect). Also add logging regarding the cache which may help troubleshooting determinacy problems. A simple cache eviction mechanism prevents the cache from growing without bounds, limiting it to 2x the size of the baseline_images directory. This is a much simpler version of PR7764, which added more sophisticated reporting of cache hits and misses and cache eviction. --- lib/matplotlib/testing/compare.py | 36 ++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/lib/matplotlib/testing/compare.py b/lib/matplotlib/testing/compare.py index 06f2074e0ef7..f3a5e7217d81 100644 --- a/lib/matplotlib/testing/compare.py +++ b/lib/matplotlib/testing/compare.py @@ -3,7 +3,9 @@ """ import atexit +import functools import hashlib +import logging import os from pathlib import Path import re @@ -19,6 +21,8 @@ from matplotlib import cbook from matplotlib.testing.exceptions import ImageComparisonFailure +_log = logging.getLogger(__name__) + __all__ = ['compare_images', 'comparable_formats'] @@ -285,20 +289,50 @@ def convert(filename, cache): cache_dir = Path(get_cache_dir()) if cache else None if cache_dir is not None: + _register_conversion_cache_cleaner_once() hash_value = get_file_hash(path) cached_path = cache_dir / (hash_value + newpath.suffix) if cached_path.exists(): + _log.debug("For %s: reusing cached conversion.", filename) shutil.copyfile(cached_path, newpath) return str(newpath) + _log.debug("For %s: converting to png.", filename) converter[path.suffix[1:]](path, newpath) if cache_dir is not None: + _log.debug("For %s: caching conversion result.", filename) shutil.copyfile(newpath, cached_path) return str(newpath) +def _clean_conversion_cache(): + # This will actually ignore mpl_toolkits baseline images, but they're + # relatively small. + baseline_images_size = sum( + path.stat().st_size + for path in Path(mpl.__file__).parent.glob("**/baseline_images/**/*")) + # 2x: one full copy of baselines, and one full copy of test results + # (actually an overestimate: we don't convert png baselines and results). + max_cache_size = 2 * baseline_images_size + # Reduce cache until it fits. + cache_stat = { + path: path.stat() for path in Path(get_cache_dir()).glob("*")} + cache_size = sum(stat.st_size for stat in cache_stat.values()) + paths_by_atime = sorted( # Oldest at the end. + cache_stat, key=lambda path: cache_stat[path].st_atime, reverse=True) + while cache_size > max_cache_size: + path = paths_by_atime.pop() + cache_size -= cache_stat[path].st_size + path.unlink() + + +@functools.lru_cache() # Ensure this is only registered once. +def _register_conversion_cache_cleaner_once(): + atexit.register(_clean_conversion_cache) + + def crop_to_same(actual_path, actual_image, expected_path, expected_image): # clip the images to the same size -- this is useful only when # comparing eps to pdf @@ -387,7 +421,7 @@ def compare_images(expected, actual, tol, in_decorator=False): raise IOError('Baseline image %r does not exist.' % expected) extension = expected.split('.')[-1] if extension != 'png': - actual = convert(actual, cache=False) + actual = convert(actual, cache=True) expected = convert(expected, cache=True) # open the image files and remove the alpha channel (if it exists)