diff --git a/doc/users/whats_new/reproducible_ps_pdf.rst b/doc/users/whats_new/reproducible_ps_pdf.rst new file mode 100644 index 000000000000..2e8294f1e414 --- /dev/null +++ b/doc/users/whats_new/reproducible_ps_pdf.rst @@ -0,0 +1,14 @@ +Reproducible PS and PDF output +------------------------------ + +The ``SOURCE_DATE_EPOCH`` environment variable can now be used to set +the timestamp value in the PS and PDF outputs. See +https://reproducible-builds.org/specs/source-date-epoch/ + +The reproducibility of the output from the PS and PDF backends has so +far been tested using various plot elements but only default values of +options such as ``{ps,pdf}.fonttype`` that can affect the output at a +low level, and not with the mathtext or usetex features. When +matplotlib calls external tools (such as PS distillers or LaTeX) their +versions need to be kept constant for reproducibility, and they may +add sources of nondeterminism outside the control of matplotlib. diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py index 21719273c423..cedfe29e8b2a 100644 --- a/lib/matplotlib/backends/backend_pdf.py +++ b/lib/matplotlib/backends/backend_pdf.py @@ -17,6 +17,7 @@ import time import warnings import zlib +import collections from io import BytesIO from functools import total_ordering @@ -24,7 +25,7 @@ from six import unichr -from datetime import datetime +from datetime import datetime, tzinfo, timedelta from math import ceil, cos, floor, pi, sin import matplotlib @@ -45,6 +46,7 @@ from matplotlib.mathtext import MathTextParser from matplotlib.transforms import Affine2D, BboxBase from matplotlib.path import Path +from matplotlib.dates import UTC from matplotlib import _path from matplotlib import _png from matplotlib import ttconv @@ -202,10 +204,14 @@ def pdfRepr(obj): # A date. elif isinstance(obj, datetime): r = obj.strftime('D:%Y%m%d%H%M%S') - if time.daylight: - z = time.altzone + z = obj.utcoffset() + if z is not None: + z = z.seconds else: - z = time.timezone + if time.daylight: + z = time.altzone + else: + z = time.timezone if z == 0: r += 'Z' elif z < 0: @@ -468,10 +474,19 @@ def __init__(self, filename): self.writeObject(self.rootObject, root) revision = '' + # get source date from SOURCE_DATE_EPOCH, if set + # See https://reproducible-builds.org/specs/source-date-epoch/ + source_date_epoch = os.getenv("SOURCE_DATE_EPOCH") + if source_date_epoch: + source_date = datetime.utcfromtimestamp(int(source_date_epoch)) + source_date = source_date.replace(tzinfo=UTC) + else: + source_date = datetime.today() + self.infoDict = { 'Creator': 'matplotlib %s, http://matplotlib.org' % __version__, 'Producer': 'matplotlib pdf backend%s' % revision, - 'CreationDate': datetime.today() + 'CreationDate': source_date } self.fontNames = {} # maps filenames to internal font names @@ -483,14 +498,15 @@ def __init__(self, filename): self.alphaStates = {} # maps alpha values to graphics state objects self.nextAlphaState = 1 - self.hatchPatterns = {} + # reproducible writeHatches needs an ordered dict: + self.hatchPatterns = collections.OrderedDict() self.nextHatch = 1 self.gouraudTriangles = [] - self._images = {} + self._images = collections.OrderedDict() # reproducible writeImages self.nextImage = 1 - self.markers = {} + self.markers = collections.OrderedDict() # reproducible writeMarkers self.multi_byte_charprocs = {} self.paths = [] @@ -640,7 +656,8 @@ def fontName(self, fontprop): def writeFonts(self): fonts = {} - for filename, Fx in six.iteritems(self.fontNames): + for filename in sorted(self.fontNames): + Fx = self.fontNames[filename] matplotlib.verbose.report('Embedding font %s' % filename, 'debug') if filename.endswith('.afm'): # from pdf.use14corefonts @@ -920,7 +937,8 @@ def get_char_width(charcode): rawcharprocs = ttconv.get_pdf_charprocs( filename.encode(sys.getfilesystemencoding()), glyph_ids) charprocs = {} - for charname, stream in six.iteritems(rawcharprocs): + for charname in sorted(rawcharprocs): + stream = rawcharprocs[charname] charprocDict = {'Length': len(stream)} # The 2-byte characters are used as XObjects, so they # need extra info in their dictionary diff --git a/lib/matplotlib/backends/backend_ps.py b/lib/matplotlib/backends/backend_ps.py index b5d284cfffbd..6769e86659f5 100644 --- a/lib/matplotlib/backends/backend_ps.py +++ b/lib/matplotlib/backends/backend_ps.py @@ -8,7 +8,7 @@ import six from six.moves import StringIO -import glob, math, os, shutil, sys, time +import glob, math, os, shutil, sys, time, datetime def _fn_name(): return sys._getframe(1).f_code.co_name import io @@ -1087,7 +1087,15 @@ def print_figure_impl(): if title: print("%%Title: "+title, file=fh) print(("%%Creator: matplotlib version " +__version__+", http://matplotlib.org/"), file=fh) - print("%%CreationDate: "+time.ctime(time.time()), file=fh) + # get source date from SOURCE_DATE_EPOCH, if set + # See https://reproducible-builds.org/specs/source-date-epoch/ + source_date_epoch = os.getenv("SOURCE_DATE_EPOCH") + if source_date_epoch: + source_date = datetime.datetime.utcfromtimestamp( + int(source_date_epoch) ).strftime("%a %b %d %H:%M:%S %Y") + else: + source_date = time.ctime() + print("%%CreationDate: "+source_date, file=fh) print("%%Orientation: " + orientation, file=fh) if not isEPSF: print("%%DocumentPaperSizes: "+papertype, file=fh) print("%%%%BoundingBox: %d %d %d %d" % bbox, file=fh) @@ -1270,7 +1278,15 @@ def write(self, *kl, **kwargs): if title: print("%%Title: "+title, file=fh) print(("%%Creator: matplotlib version " +__version__+", http://matplotlib.org/"), file=fh) - print("%%CreationDate: "+time.ctime(time.time()), file=fh) + # get source date from SOURCE_DATE_EPOCH, if set + # See https://reproducible-builds.org/specs/source-date-epoch/ + source_date_epoch = os.getenv("SOURCE_DATE_EPOCH") + if source_date_epoch: + source_date = datetime.datetime.utcfromtimestamp( + int(source_date_epoch) ).strftime("%a %b %d %H:%M:%S %Y") + else: + source_date = time.ctime() + print("%%CreationDate: "+source_date, file=fh) print("%%%%BoundingBox: %d %d %d %d" % bbox, file=fh) print("%%EndComments", file=fh) diff --git a/lib/matplotlib/testing/determinism.py b/lib/matplotlib/testing/determinism.py new file mode 100644 index 000000000000..07b5e831451f --- /dev/null +++ b/lib/matplotlib/testing/determinism.py @@ -0,0 +1,144 @@ +""" +Provides utilities to test output reproducibility. +""" + +from __future__ import (absolute_import, division, print_function, + unicode_literals) + +import six + +import io +import os +import re +import sys +from subprocess import check_output + +import matplotlib +from matplotlib import pyplot as plt + +from nose.plugins.skip import SkipTest + + +def _determinism_save(objects='mhi', format="pdf", usetex=False): + # save current value of SOURCE_DATE_EPOCH and set it + # to a constant value, so that time difference is not + # taken into account + sde = os.environ.pop('SOURCE_DATE_EPOCH', None) + os.environ['SOURCE_DATE_EPOCH'] = "946684800" + + matplotlib.rcParams['text.usetex'] = usetex + + fig = plt.figure() + + if 'm' in objects: + # use different markers... + ax1 = fig.add_subplot(1, 6, 1) + x = range(10) + ax1.plot(x, [1] * 10, marker=u'D') + ax1.plot(x, [2] * 10, marker=u'x') + ax1.plot(x, [3] * 10, marker=u'^') + ax1.plot(x, [4] * 10, marker=u'H') + ax1.plot(x, [5] * 10, marker=u'v') + + if 'h' in objects: + # also use different hatch patterns + ax2 = fig.add_subplot(1, 6, 2) + bars = ax2.bar(range(1, 5), range(1, 5)) + \ + ax2.bar(range(1, 5), [6] * 4, bottom=range(1, 5)) + ax2.set_xticks([1.5, 2.5, 3.5, 4.5]) + + patterns = ('-', '+', 'x', '\\', '*', 'o', 'O', '.') + for bar, pattern in zip(bars, patterns): + bar.set_hatch(pattern) + + if 'i' in objects: + # also use different images + A = [[1, 2, 3], [2, 3, 1], [3, 1, 2]] + fig.add_subplot(1, 6, 3).imshow(A, interpolation='nearest') + A = [[1, 3, 2], [1, 2, 3], [3, 1, 2]] + fig.add_subplot(1, 6, 4).imshow(A, interpolation='bilinear') + A = [[2, 3, 1], [1, 2, 3], [2, 1, 3]] + fig.add_subplot(1, 6, 5).imshow(A, interpolation='bicubic') + + x = range(5) + fig.add_subplot(1, 6, 6).plot(x, x) + + if six.PY2 and format == 'ps': + stdout = io.StringIO() + else: + stdout = getattr(sys.stdout, 'buffer', sys.stdout) + fig.savefig(stdout, format=format) + if six.PY2 and format == 'ps': + sys.stdout.write(stdout.getvalue()) + + # Restores SOURCE_DATE_EPOCH + if sde is None: + os.environ.pop('SOURCE_DATE_EPOCH', None) + else: + os.environ['SOURCE_DATE_EPOCH'] = sde + + +def _determinism_check(objects='mhi', format="pdf", usetex=False): + """ + Output three times the same graphs and checks that the outputs are exactly + the same. + + Parameters + ---------- + objects : str + contains characters corresponding to objects to be included in the test + document: 'm' for markers, 'h' for hatch patterns, 'i' for images. The + default value is "mhi", so that the test includes all these objects. + format : str + format string. The default value is "pdf". + """ + from nose.tools import assert_equal + plots = [] + for i in range(3): + result = check_output([sys.executable, '-R', '-c', + 'import matplotlib; ' + 'matplotlib.use(%r); ' + 'from matplotlib.testing.determinism ' + 'import _determinism_save;' + '_determinism_save(%r,%r,%r)' + % (format, objects, format, usetex)]) + plots.append(result) + for p in plots[1:]: + if usetex: + if p != plots[0]: + raise SkipTest("failed, maybe due to ghostscript timestamps") + else: + assert_equal(p, plots[0]) + + +def _determinism_source_date_epoch(format, string, keyword=b"CreationDate"): + """ + Test SOURCE_DATE_EPOCH support. Output a document with the envionment + variable SOURCE_DATE_EPOCH set to 2000-01-01 00:00 UTC and check that the + document contains the timestamp that corresponds to this date (given as an + argument). + + Parameters + ---------- + format : str + format string, such as "pdf". + string : str + timestamp string for 2000-01-01 00:00 UTC. + keyword : bytes + a string to look at when searching for the timestamp in the document + (used in case the test fails). + """ + buff = check_output([sys.executable, '-R', '-c', + 'import matplotlib; ' + 'matplotlib.use(%r); ' + 'from matplotlib.testing.determinism ' + 'import _determinism_save;' + '_determinism_save(%r,%r)' + % (format, "", format)]) + find_keyword = re.compile(b".*" + keyword + b".*") + key = find_keyword.search(buff) + if key: + print(key.group()) + else: + print("Timestamp keyword (%s) not found!" % keyword) + assert string in buff diff --git a/lib/matplotlib/tests/test_backend_pdf.py b/lib/matplotlib/tests/test_backend_pdf.py index 2feee6fb1238..dd4dc95aca1a 100644 --- a/lib/matplotlib/tests/test_backend_pdf.py +++ b/lib/matplotlib/tests/test_backend_pdf.py @@ -12,6 +12,8 @@ from matplotlib import cm, rcParams from matplotlib.backends.backend_pdf import PdfPages from matplotlib import pyplot as plt +from matplotlib.testing.determinism import (_determinism_source_date_epoch, + _determinism_check) from matplotlib.testing.decorators import (image_comparison, knownfailureif, cleanup) @@ -90,8 +92,8 @@ def test_multipage_keep_empty(): @cleanup def test_composite_image(): - #Test that figures can be saved with and without combining multiple images - #(on a single set of axes) into a single composite image. + # Test that figures can be saved with and without combining multiple images + # (on a single set of axes) into a single composite image. X, Y = np.meshgrid(np.arange(-5, 5, 1), np.arange(-5, 5, 1)) Z = np.sin(Y ** 2) fig = plt.figure() @@ -109,6 +111,42 @@ def test_composite_image(): assert len(pdf._file._images.keys()) == 2 +@cleanup +def test_source_date_epoch(): + """Test SOURCE_DATE_EPOCH support for PDF output""" + _determinism_source_date_epoch("pdf", b"/CreationDate (D:20000101000000Z)") + + +@cleanup +def test_determinism_plain(): + """Test for reproducible PDF output: simple figure""" + _determinism_check('', format="pdf") + + +@cleanup +def test_determinism_images(): + """Test for reproducible PDF output: figure with different images""" + _determinism_check('i', format="pdf") + + +@cleanup +def test_determinism_hatches(): + """Test for reproducible PDF output: figure with different hatches""" + _determinism_check('h', format="pdf") + + +@cleanup +def test_determinism_markers(): + """Test for reproducible PDF output: figure with different markers""" + _determinism_check('m', format="pdf") + + +@cleanup +def test_determinism_all(): + """Test for reproducible PDF output""" + _determinism_check(format="pdf") + + @image_comparison(baseline_images=['hatching_legend'], extensions=['pdf']) def test_hatching_legend(): diff --git a/lib/matplotlib/tests/test_backend_ps.py b/lib/matplotlib/tests/test_backend_ps.py index f017164ff7bb..7797deb01669 100644 --- a/lib/matplotlib/tests/test_backend_ps.py +++ b/lib/matplotlib/tests/test_backend_ps.py @@ -11,6 +11,8 @@ import matplotlib import matplotlib.pyplot as plt from matplotlib import patheffects +from matplotlib.testing.determinism import (_determinism_source_date_epoch, + _determinism_check) from matplotlib.testing.decorators import cleanup, knownfailureif @@ -160,7 +162,7 @@ def test_tilde_in_tempfilename(): plt.rc('text', usetex=True) plt.plot([1, 2, 3, 4]) plt.xlabel(r'\textbf{time} (s)') - #matplotlib.verbose.set_level("debug") + # matplotlib.verbose.set_level("debug") output_eps = os.path.join(base_tempdir, 'tex_demo.eps') # use the PS backend to write the file... plt.savefig(output_eps, format="ps") @@ -174,6 +176,31 @@ def test_tilde_in_tempfilename(): print(e) +@cleanup +def test_source_date_epoch(): + """Test SOURCE_DATE_EPOCH support for PS output""" + # SOURCE_DATE_EPOCH support is not tested with text.usetex, + # because the produced timestamp comes from ghostscript: + # %%CreationDate: D:20000101000000Z00\'00\', and this could change + # with another ghostscript version. + _determinism_source_date_epoch( + "ps", b"%%CreationDate: Sat Jan 01 00:00:00 2000") + + +@cleanup +def test_determinism_all(): + """Test for reproducible PS output""" + _determinism_check(format="ps") + + +@cleanup +@needs_tex +@needs_ghostscript +def test_determinism_all_tex(): + """Test for reproducible PS/tex output""" + _determinism_check(format="ps", usetex=True) + + if __name__ == '__main__': import nose nose.runmodule(argv=['-s', '--with-doctest'], exit=False)