Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6c9146e

Browse files
committed
Factor the converted-image cache out of compare.py
There is a cache of png files keyed by the MD5 hashes of corresponding svg and pdf files, which helps reduce test suite running times for svg and pdf files that stay exactly the same from one run to the next. This patch enables caching of test results, not only expected results, which is only useful if the tests are mostly deterministic (see #7748). It adds reporting of cache misses, which can be helpful in getting tests to stay deterministic, and expiration since the test results are going to change more often than the expected results.
1 parent 1fa4dd7 commit 6c9146e

File tree

7 files changed

+413
-68
lines changed

7 files changed

+413
-68
lines changed

conftest.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
matplotlib.use('agg')
1111

1212
from matplotlib import default_test_modules
13+
from matplotlib.testing import conversion_cache as ccache
1314

1415

1516
IGNORED_TESTS = {
@@ -62,6 +63,11 @@ def pytest_addoption(parser):
6263

6364
group.addoption('--no-pep8', action='store_true',
6465
help='skip PEP8 compliance tests')
66+
group.addoption("--conversion-cache-max-size", action="store",
67+
help="conversion cache maximum size in bytes")
68+
group.addoption("--conversion-cache-report-misses",
69+
action="store_true",
70+
help="report conversion cache misses")
6571

6672

6773
def pytest_configure(config):
@@ -71,12 +77,30 @@ def pytest_configure(config):
7177
if config.getoption('--no-pep8'):
7278
default_test_modules.remove('matplotlib.tests.test_coding_standards')
7379
IGNORED_TESTS['matplotlib'] += 'test_coding_standards'
80+
max_size = config.getoption('--conversion-cache-max-size')
81+
if max_size is not None:
82+
ccache.conversion_cache = \
83+
ccache.ConversionCache(max_size=int(max_size))
84+
else:
85+
ccache.conversion_cache = ccache.ConversionCache()
7486

7587

7688
def pytest_unconfigure(config):
89+
ccache.conversion_cache.expire()
7790
matplotlib._called_from_pytest = False
7891

7992

93+
def pytest_terminal_summary(terminalreporter):
94+
tr = terminalreporter
95+
data = ccache.conversion_cache.report()
96+
tr.write_sep('-', 'Image conversion cache report')
97+
tr.write_line('Hit rate: %d/%d' % (len(data['hits']), len(data['gets'])))
98+
if tr.config.getoption('--conversion-cache-report-misses'):
99+
tr.write_line('Missed files:')
100+
for filename in sorted(data['gets'].difference(data['hits'])):
101+
tr.write_line(' %s' % filename)
102+
103+
80104
def pytest_ignore_collect(path, config):
81105
if path.ext == '.py':
82106
collect_filter = config.getoption('--collect-filter')

lib/matplotlib/testing/compare.py

Lines changed: 19 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,14 @@
55
from __future__ import (absolute_import, division, print_function,
66
unicode_literals)
77

8-
import six
9-
10-
import hashlib
118
import os
12-
import shutil
139

1410
import numpy as np
1511

1612
import matplotlib
1713
from matplotlib.compat import subprocess
1814
from matplotlib.testing.exceptions import ImageComparisonFailure
1915
from matplotlib import _png
20-
from matplotlib import _get_cachedir
21-
from matplotlib import cbook
22-
from distutils import version
2316

2417
__all__ = ['compare_float', 'compare_images', 'comparable_formats']
2518

@@ -76,40 +69,6 @@ def compare_float(expected, actual, relTol=None, absTol=None):
7669
return msg or None
7770

7871

79-
def get_cache_dir():
80-
cachedir = _get_cachedir()
81-
if cachedir is None:
82-
raise RuntimeError('Could not find a suitable configuration directory')
83-
cache_dir = os.path.join(cachedir, 'test_cache')
84-
if not os.path.exists(cache_dir):
85-
try:
86-
cbook.mkdirs(cache_dir)
87-
except IOError:
88-
return None
89-
if not os.access(cache_dir, os.W_OK):
90-
return None
91-
return cache_dir
92-
93-
94-
def get_file_hash(path, block_size=2 ** 20):
95-
md5 = hashlib.md5()
96-
with open(path, 'rb') as fd:
97-
while True:
98-
data = fd.read(block_size)
99-
if not data:
100-
break
101-
md5.update(data)
102-
103-
if path.endswith('.pdf'):
104-
from matplotlib import checkdep_ghostscript
105-
md5.update(checkdep_ghostscript()[1].encode('utf-8'))
106-
elif path.endswith('.svg'):
107-
from matplotlib import checkdep_inkscape
108-
md5.update(checkdep_inkscape().encode('utf-8'))
109-
110-
return md5.hexdigest()
111-
112-
11372
def make_external_conversion_command(cmd):
11473
def convert(old, new):
11574
cmdline = cmd(old, new)
@@ -160,16 +119,20 @@ def comparable_formats():
160119
return ['png'] + list(converter)
161120

162121

163-
def convert(filename, cache):
122+
def convert(filename, cache=None):
164123
"""
165124
Convert the named file into a png file. Returns the name of the
166125
created file.
167126
168-
If *cache* is True, the result of the conversion is cached in
169-
`matplotlib._get_cachedir() + '/test_cache/'`. The caching is based
170-
on a hash of the exact contents of the input file. The is no limit
171-
on the size of the cache, so it may need to be manually cleared
172-
periodically.
127+
Parameters
128+
----------
129+
filename : str
130+
cache : ConversionCache, optional
131+
132+
Returns
133+
-------
134+
str
135+
The converted file.
173136
174137
"""
175138
base, extension = filename.rsplit('.', 1)
@@ -184,23 +147,12 @@ def convert(filename, cache):
184147
# is out of date.
185148
if (not os.path.exists(newname) or
186149
os.stat(newname).st_mtime < os.stat(filename).st_mtime):
187-
if cache:
188-
cache_dir = get_cache_dir()
189-
else:
190-
cache_dir = None
191-
192-
if cache_dir is not None:
193-
hash_value = get_file_hash(filename)
194-
new_ext = os.path.splitext(newname)[1]
195-
cached_file = os.path.join(cache_dir, hash_value + new_ext)
196-
if os.path.exists(cached_file):
197-
shutil.copyfile(cached_file, newname)
198-
return newname
199-
150+
in_cache = cache and cache.get(filename, newname)
151+
if in_cache:
152+
return newname
200153
converter[extension](filename, newname)
201-
202-
if cache_dir is not None:
203-
shutil.copyfile(newname, cached_file)
154+
if cache:
155+
cache.put(filename, newname)
204156

205157
return newname
206158

@@ -262,7 +214,7 @@ def calculate_rms(expectedImage, actualImage):
262214
return rms
263215

264216

265-
def compare_images(expected, actual, tol, in_decorator=False):
217+
def compare_images(expected, actual, tol, in_decorator=False, cache=None):
266218
"""
267219
Compare two "image" files checking differences within a tolerance.
268220
@@ -283,6 +235,7 @@ def compare_images(expected, actual, tol, in_decorator=False):
283235
in_decorator : bool
284236
If called from image_comparison decorator, this should be
285237
True. (default=False)
238+
cache : cache.ConversionCache, optional
286239
287240
Example
288241
-------
@@ -308,8 +261,8 @@ def compare_images(expected, actual, tol, in_decorator=False):
308261
raise IOError('Baseline image %r does not exist.' % expected)
309262

310263
if extension != 'png':
311-
actual = convert(actual, False)
312-
expected = convert(expected, True)
264+
actual = convert(actual, cache)
265+
expected = convert(expected, cache)
313266

314267
# open the image files and remove the alpha channel (if it exists)
315268
expectedImage = _png.read_png_int(expected)
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
"""
2+
A cache of png files keyed by the MD5 hashes of corresponding svg and
3+
pdf files, to reduce test suite running times for svg and pdf files
4+
that stay exactly the same from one run to the next.
5+
6+
There is a corresponding nose plugin in testing/nose/plugins and
7+
similar pytest code in conftest.py.
8+
"""
9+
10+
from __future__ import (absolute_import, division, print_function,
11+
unicode_literals)
12+
13+
import hashlib
14+
import shutil
15+
import os
16+
import warnings
17+
18+
from matplotlib import _get_cachedir
19+
from matplotlib import cbook
20+
from matplotlib import checkdep_ghostscript
21+
from matplotlib import checkdep_inkscape
22+
23+
24+
class ConversionCache(object):
25+
"""A cache that stores png files converted from svg or pdf formats.
26+
27+
The image comparison test cases compare svg and pdf files by
28+
converting them to png files. When a test case has produced a
29+
file, e.g. result.pdf, it queries this cache by the pathname
30+
'/path/to/result_images/result.pdf'. The cache computes a hash of
31+
the file (and the version of the external software used to convert
32+
the file) and if a result by that hash value is available, writes
33+
the data to the output location supplied by the caller. Otherwise
34+
the test case has to run the conversion and can then insert the
35+
result into the cache.
36+
37+
Parameters
38+
----------
39+
directory : str, optional
40+
Files are stored in this directory, defaults to `'test_cache'` in
41+
the overall Matplotlib cache directory.
42+
max_size : int, optional
43+
The flush method will delete files until their combined size is
44+
under this limit, in bytes. Defaults to 100 megabytes.
45+
46+
"""
47+
48+
def __init__(self, directory=None, max_size=int(1e8)):
49+
self.gets = set()
50+
self.hits = set()
51+
if directory is not None:
52+
self.cachedir = directory
53+
else:
54+
self.cachedir = self.get_cache_dir()
55+
self.ensure_cache_dir()
56+
if not isinstance(max_size, int):
57+
raise ValueError("max_size is %s, expected int" % type(max_size))
58+
self.max_size = max_size
59+
self.cached_ext = '.png'
60+
self.converter_version = {}
61+
try:
62+
self.converter_version['.pdf'] = \
63+
checkdep_ghostscript()[1].encode('utf-8')
64+
except:
65+
pass
66+
try:
67+
self.converter_version['.svg'] = \
68+
checkdep_inkscape().encode('utf-8')
69+
except:
70+
pass
71+
self.hash_cache = {}
72+
73+
def get(self, filename, newname):
74+
"""Query the cache.
75+
76+
Parameters
77+
----------
78+
filename : str
79+
Full path to the original file.
80+
newname : str
81+
Path to which the result should be written.
82+
83+
Returns
84+
-------
85+
bool
86+
True if the file was found in the cache and is now written
87+
to `newname`.
88+
"""
89+
self.gets.add(filename)
90+
hash_value = self._get_file_hash(filename)
91+
cached_file = os.path.join(self.cachedir, hash_value + self.cached_ext)
92+
if os.path.exists(cached_file):
93+
shutil.copyfile(cached_file, newname)
94+
self.hits.add(filename)
95+
return True
96+
else:
97+
return False
98+
99+
def put(self, original, converted):
100+
"""Insert a file into the cache.
101+
102+
Parameters
103+
----------
104+
original : str
105+
Full path to the original file.
106+
converted : str
107+
Full path to the png file converted from the original.
108+
"""
109+
hash_value = self._get_file_hash(original)
110+
cached_file = os.path.join(self.cachedir, hash_value + self.cached_ext)
111+
shutil.copyfile(converted, cached_file)
112+
113+
def _get_file_hash(self, path, block_size=2 ** 20):
114+
if path in self.hash_cache:
115+
return self.hash_cache[path]
116+
md5 = hashlib.md5()
117+
with open(path, 'rb') as fd:
118+
while True:
119+
data = fd.read(block_size)
120+
if not data:
121+
break
122+
md5.update(data)
123+
_, ext = os.path.splitext(path)
124+
version_tag = self.converter_version.get(ext)
125+
if version_tag:
126+
md5.update(version_tag)
127+
else:
128+
warnings.warn(("Don't know the external converter for %s, cannot "
129+
"ensure cache invalidation on version update.")
130+
% path)
131+
132+
result = md5.hexdigest()
133+
self.hash_cache[path] = result
134+
return result
135+
136+
def report(self):
137+
"""Return information about the cache.
138+
139+
Returns
140+
-------
141+
r : dict
142+
`r['gets']` is the set of files queried,
143+
`r['hits']` is the set of files found in the cache
144+
"""
145+
return dict(hits=self.hits, gets=self.gets)
146+
147+
def expire(self):
148+
"""Delete cached files until the disk usage is under the limit.
149+
150+
Orders files by access time, so the least recently used files
151+
get deleted first.
152+
"""
153+
stats = {filename: os.stat(os.path.join(self.cachedir, filename))
154+
for filename in os.listdir(self.cachedir)}
155+
usage = sum(f.st_size for f in stats.values())
156+
to_free = usage - self.max_size
157+
if to_free <= 0:
158+
return
159+
160+
files = sorted(os.listdir(self.cachedir),
161+
key=lambda f: stats[f].st_atime,
162+
reverse=True)
163+
while to_free > 0:
164+
filename = files.pop()
165+
os.remove(os.path.join(self.cachedir, filename))
166+
to_free -= stats[filename].st_size
167+
168+
@staticmethod
169+
def get_cache_dir():
170+
cachedir = _get_cachedir()
171+
if cachedir is None:
172+
raise CacheError('No suitable configuration directory')
173+
cachedir = os.path.join(cachedir, 'test_cache')
174+
return cachedir
175+
176+
def ensure_cache_dir(self):
177+
if not os.path.exists(self.cachedir):
178+
try:
179+
cbook.mkdirs(self.cachedir)
180+
except IOError as e:
181+
raise CacheError("Error creating cache directory %s: %s"
182+
% (self.cachedir, str(e)))
183+
if not os.access(self.cachedir, os.W_OK):
184+
raise CacheError("Cache directory %s not writable" % self.cachedir)
185+
186+
187+
class CacheError(Exception):
188+
def __init__(self, message):
189+
self.message = message
190+
191+
def __str__(self):
192+
return self.message
193+
194+
195+
# A global cache instance, set by the appropriate test runner.
196+
conversion_cache = None

0 commit comments

Comments
 (0)