|
| 1 | +""" |
| 2 | +A cache of png files keyed by the MD5 hashes of corresponding svg and |
| 3 | +pdf files, to reduce test suite running times for svg and pdf files |
| 4 | +that stay exactly the same from one run to the next. |
| 5 | +
|
| 6 | +There is a corresponding nose plugin in testing/nose/plugins and |
| 7 | +similar pytest code in conftest.py. |
| 8 | +""" |
| 9 | + |
| 10 | +from __future__ import (absolute_import, division, print_function, |
| 11 | + unicode_literals) |
| 12 | + |
| 13 | +import hashlib |
| 14 | +import shutil |
| 15 | +import os |
| 16 | +import warnings |
| 17 | + |
| 18 | +from matplotlib import _get_cachedir |
| 19 | +from matplotlib import cbook |
| 20 | +from matplotlib import checkdep_ghostscript |
| 21 | +from matplotlib import checkdep_inkscape |
| 22 | + |
| 23 | + |
| 24 | +class ConversionCache(object): |
| 25 | + """A cache that stores png files converted from svg or pdf formats. |
| 26 | +
|
| 27 | + The image comparison test cases compare svg and pdf files by |
| 28 | + converting them to png files. When a test case has produced a |
| 29 | + file, e.g. result.pdf, it queries this cache by the pathname |
| 30 | + '/path/to/result_images/result.pdf'. The cache computes a hash of |
| 31 | + the file (and the version of the external software used to convert |
| 32 | + the file) and if a result by that hash value is available, writes |
| 33 | + the data to the output location supplied by the caller. Otherwise |
| 34 | + the test case has to run the conversion and can then insert the |
| 35 | + result into the cache. |
| 36 | +
|
| 37 | + Parameters |
| 38 | + ---------- |
| 39 | + directory : str, optional |
| 40 | + Files are stored in this directory, defaults to `'test_cache'` in |
| 41 | + the overall Matplotlib cache directory. |
| 42 | + max_size : int, optional |
| 43 | + The flush method will delete files until their combined size is |
| 44 | + under this limit, in bytes. Defaults to 100 megabytes. |
| 45 | +
|
| 46 | + """ |
| 47 | + |
| 48 | + def __init__(self, directory=None, max_size=int(1e8)): |
| 49 | + self.gets = set() |
| 50 | + self.hits = set() |
| 51 | + if directory is not None: |
| 52 | + self.cachedir = directory |
| 53 | + else: |
| 54 | + self.cachedir = self.get_cache_dir() |
| 55 | + self.ensure_cache_dir() |
| 56 | + if not isinstance(max_size, int): |
| 57 | + raise ValueError("max_size is %s, expected int" % type(max_size)) |
| 58 | + self.max_size = max_size |
| 59 | + self.cached_ext = '.png' |
| 60 | + self.converter_version = {} |
| 61 | + try: |
| 62 | + self.converter_version['.pdf'] = \ |
| 63 | + checkdep_ghostscript()[1].encode('utf-8') |
| 64 | + except: |
| 65 | + pass |
| 66 | + try: |
| 67 | + self.converter_version['.svg'] = \ |
| 68 | + checkdep_inkscape().encode('utf-8') |
| 69 | + except: |
| 70 | + pass |
| 71 | + self.hash_cache = {} |
| 72 | + |
| 73 | + def get(self, filename, newname): |
| 74 | + """Query the cache. |
| 75 | +
|
| 76 | + Parameters |
| 77 | + ---------- |
| 78 | + filename : str |
| 79 | + Full path to the original file. |
| 80 | + newname : str |
| 81 | + Path to which the result should be written. |
| 82 | +
|
| 83 | + Returns |
| 84 | + ------- |
| 85 | + bool |
| 86 | + True if the file was found in the cache and is now written |
| 87 | + to `newname`. |
| 88 | + """ |
| 89 | + self.gets.add(filename) |
| 90 | + hash_value = self._get_file_hash(filename) |
| 91 | + cached_file = os.path.join(self.cachedir, hash_value + self.cached_ext) |
| 92 | + if os.path.exists(cached_file): |
| 93 | + shutil.copyfile(cached_file, newname) |
| 94 | + self.hits.add(filename) |
| 95 | + return True |
| 96 | + else: |
| 97 | + return False |
| 98 | + |
| 99 | + def put(self, original, converted): |
| 100 | + """Insert a file into the cache. |
| 101 | +
|
| 102 | + Parameters |
| 103 | + ---------- |
| 104 | + original : str |
| 105 | + Full path to the original file. |
| 106 | + converted : str |
| 107 | + Full path to the png file converted from the original. |
| 108 | + """ |
| 109 | + hash_value = self._get_file_hash(original) |
| 110 | + cached_file = os.path.join(self.cachedir, hash_value + self.cached_ext) |
| 111 | + shutil.copyfile(converted, cached_file) |
| 112 | + |
| 113 | + def _get_file_hash(self, path, block_size=2 ** 20): |
| 114 | + if path in self.hash_cache: |
| 115 | + return self.hash_cache[path] |
| 116 | + md5 = hashlib.md5() |
| 117 | + with open(path, 'rb') as fd: |
| 118 | + while True: |
| 119 | + data = fd.read(block_size) |
| 120 | + if not data: |
| 121 | + break |
| 122 | + md5.update(data) |
| 123 | + _, ext = os.path.splitext(path) |
| 124 | + version_tag = self.converter_version.get(ext) |
| 125 | + if version_tag: |
| 126 | + md5.update(version_tag) |
| 127 | + else: |
| 128 | + warnings.warn(("Don't know the external converter for %s, cannot " |
| 129 | + "ensure cache invalidation on version update.") |
| 130 | + % path) |
| 131 | + |
| 132 | + result = md5.hexdigest() |
| 133 | + self.hash_cache[path] = result |
| 134 | + return result |
| 135 | + |
| 136 | + def report(self): |
| 137 | + """Return information about the cache. |
| 138 | +
|
| 139 | + Returns |
| 140 | + ------- |
| 141 | + r : dict |
| 142 | + `r['gets']` is the set of files queried, |
| 143 | + `r['hits']` is the set of files found in the cache |
| 144 | + """ |
| 145 | + return dict(hits=self.hits, gets=self.gets) |
| 146 | + |
| 147 | + def expire(self): |
| 148 | + """Delete cached files until the disk usage is under the limit. |
| 149 | +
|
| 150 | + Orders files by access time, so the least recently used files |
| 151 | + get deleted first. |
| 152 | + """ |
| 153 | + stats = {filename: os.stat(os.path.join(self.cachedir, filename)) |
| 154 | + for filename in os.listdir(self.cachedir)} |
| 155 | + usage = sum(f.st_size for f in stats.values()) |
| 156 | + to_free = usage - self.max_size |
| 157 | + if to_free <= 0: |
| 158 | + return |
| 159 | + |
| 160 | + files = sorted(os.listdir(self.cachedir), |
| 161 | + key=lambda f: stats[f].st_atime, |
| 162 | + reverse=True) |
| 163 | + while to_free > 0: |
| 164 | + filename = files.pop() |
| 165 | + os.remove(os.path.join(self.cachedir, filename)) |
| 166 | + to_free -= stats[filename].st_size |
| 167 | + |
| 168 | + @staticmethod |
| 169 | + def get_cache_dir(): |
| 170 | + cachedir = _get_cachedir() |
| 171 | + if cachedir is None: |
| 172 | + raise CacheError('No suitable configuration directory') |
| 173 | + cachedir = os.path.join(cachedir, 'test_cache') |
| 174 | + return cachedir |
| 175 | + |
| 176 | + def ensure_cache_dir(self): |
| 177 | + if not os.path.exists(self.cachedir): |
| 178 | + try: |
| 179 | + cbook.mkdirs(self.cachedir) |
| 180 | + except IOError as e: |
| 181 | + raise CacheError("Error creating cache directory %s: %s" |
| 182 | + % (self.cachedir, str(e))) |
| 183 | + if not os.access(self.cachedir, os.W_OK): |
| 184 | + raise CacheError("Cache directory %s not writable" % self.cachedir) |
| 185 | + |
| 186 | + |
| 187 | +class CacheError(Exception): |
| 188 | + def __init__(self, message): |
| 189 | + self.message = message |
| 190 | + |
| 191 | + def __str__(self): |
| 192 | + return self.message |
| 193 | + |
| 194 | + |
| 195 | +# A global cache instance, set by the appropriate test runner. |
| 196 | +conversion_cache = None |
0 commit comments