Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit eadafc6

Browse files
authored
Merge pull request #6597 from JojoBoulix/reproducible-master
[MRG+1] Reproducible PS/PDF output (master)
2 parents 3235300 + 1a5ada6 commit eadafc6

File tree

6 files changed

+273
-16
lines changed

6 files changed

+273
-16
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
Reproducible PS and PDF output
2+
------------------------------
3+
4+
The ``SOURCE_DATE_EPOCH`` environment variable can now be used to set
5+
the timestamp value in the PS and PDF outputs. See
6+
https://reproducible-builds.org/specs/source-date-epoch/
7+
8+
The reproducibility of the output from the PS and PDF backends has so
9+
far been tested using various plot elements but only default values of
10+
options such as ``{ps,pdf}.fonttype`` that can affect the output at a
11+
low level, and not with the mathtext or usetex features. When
12+
matplotlib calls external tools (such as PS distillers or LaTeX) their
13+
versions need to be kept constant for reproducibility, and they may
14+
add sources of nondeterminism outside the control of matplotlib.

lib/matplotlib/backends/backend_pdf.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717
import time
1818
import warnings
1919
import zlib
20+
import collections
2021
from io import BytesIO
2122
from functools import total_ordering
2223

2324
import numpy as np
2425
from six import unichr
2526

2627

27-
from datetime import datetime
28+
from datetime import datetime, tzinfo, timedelta
2829
from math import ceil, cos, floor, pi, sin
2930

3031
import matplotlib
@@ -45,6 +46,7 @@
4546
from matplotlib.mathtext import MathTextParser
4647
from matplotlib.transforms import Affine2D, BboxBase
4748
from matplotlib.path import Path
49+
from matplotlib.dates import UTC
4850
from matplotlib import _path
4951
from matplotlib import _png
5052
from matplotlib import ttconv
@@ -202,10 +204,14 @@ def pdfRepr(obj):
202204
# A date.
203205
elif isinstance(obj, datetime):
204206
r = obj.strftime('D:%Y%m%d%H%M%S')
205-
if time.daylight:
206-
z = time.altzone
207+
z = obj.utcoffset()
208+
if z is not None:
209+
z = z.seconds
207210
else:
208-
z = time.timezone
211+
if time.daylight:
212+
z = time.altzone
213+
else:
214+
z = time.timezone
209215
if z == 0:
210216
r += 'Z'
211217
elif z < 0:
@@ -467,10 +473,19 @@ def __init__(self, filename):
467473
self.writeObject(self.rootObject, root)
468474

469475
revision = ''
476+
# get source date from SOURCE_DATE_EPOCH, if set
477+
# See https://reproducible-builds.org/specs/source-date-epoch/
478+
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
479+
if source_date_epoch:
480+
source_date = datetime.utcfromtimestamp(int(source_date_epoch))
481+
source_date = source_date.replace(tzinfo=UTC)
482+
else:
483+
source_date = datetime.today()
484+
470485
self.infoDict = {
471486
'Creator': 'matplotlib %s, http://matplotlib.org' % __version__,
472487
'Producer': 'matplotlib pdf backend%s' % revision,
473-
'CreationDate': datetime.today()
488+
'CreationDate': source_date
474489
}
475490

476491
self.fontNames = {} # maps filenames to internal font names
@@ -482,14 +497,15 @@ def __init__(self, filename):
482497

483498
self.alphaStates = {} # maps alpha values to graphics state objects
484499
self.nextAlphaState = 1
485-
self.hatchPatterns = {}
500+
# reproducible writeHatches needs an ordered dict:
501+
self.hatchPatterns = collections.OrderedDict()
486502
self.nextHatch = 1
487503
self.gouraudTriangles = []
488504

489-
self._images = {}
505+
self._images = collections.OrderedDict() # reproducible writeImages
490506
self.nextImage = 1
491507

492-
self.markers = {}
508+
self.markers = collections.OrderedDict() # reproducible writeMarkers
493509
self.multi_byte_charprocs = {}
494510

495511
self.paths = []
@@ -640,7 +656,8 @@ def fontName(self, fontprop):
640656

641657
def writeFonts(self):
642658
fonts = {}
643-
for filename, Fx in six.iteritems(self.fontNames):
659+
for filename in sorted(self.fontNames):
660+
Fx = self.fontNames[filename]
644661
matplotlib.verbose.report('Embedding font %s' % filename, 'debug')
645662
if filename.endswith('.afm'):
646663
# from pdf.use14corefonts
@@ -920,7 +937,8 @@ def get_char_width(charcode):
920937
rawcharprocs = ttconv.get_pdf_charprocs(
921938
filename.encode(sys.getfilesystemencoding()), glyph_ids)
922939
charprocs = {}
923-
for charname, stream in six.iteritems(rawcharprocs):
940+
for charname in sorted(rawcharprocs):
941+
stream = rawcharprocs[charname]
924942
charprocDict = {'Length': len(stream)}
925943
# The 2-byte characters are used as XObjects, so they
926944
# need extra info in their dictionary

lib/matplotlib/backends/backend_ps.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import six
99
from six.moves import StringIO
1010

11-
import glob, math, os, shutil, sys, time
11+
import glob, math, os, shutil, sys, time, datetime
1212
def _fn_name(): return sys._getframe(1).f_code.co_name
1313
import io
1414

@@ -1083,7 +1083,15 @@ def print_figure_impl():
10831083
if title: print("%%Title: "+title, file=fh)
10841084
print(("%%Creator: matplotlib version "
10851085
+__version__+", http://matplotlib.org/"), file=fh)
1086-
print("%%CreationDate: "+time.ctime(time.time()), file=fh)
1086+
# get source date from SOURCE_DATE_EPOCH, if set
1087+
# See https://reproducible-builds.org/specs/source-date-epoch/
1088+
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
1089+
if source_date_epoch:
1090+
source_date = datetime.datetime.utcfromtimestamp(
1091+
int(source_date_epoch) ).strftime("%a %b %d %H:%M:%S %Y")
1092+
else:
1093+
source_date = time.ctime()
1094+
print("%%CreationDate: "+source_date, file=fh)
10871095
print("%%Orientation: " + orientation, file=fh)
10881096
if not isEPSF: print("%%DocumentPaperSizes: "+papertype, file=fh)
10891097
print("%%%%BoundingBox: %d %d %d %d" % bbox, file=fh)
@@ -1266,7 +1274,15 @@ def write(self, *kl, **kwargs):
12661274
if title: print("%%Title: "+title, file=fh)
12671275
print(("%%Creator: matplotlib version "
12681276
+__version__+", http://matplotlib.org/"), file=fh)
1269-
print("%%CreationDate: "+time.ctime(time.time()), file=fh)
1277+
# get source date from SOURCE_DATE_EPOCH, if set
1278+
# See https://reproducible-builds.org/specs/source-date-epoch/
1279+
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
1280+
if source_date_epoch:
1281+
source_date = datetime.datetime.utcfromtimestamp(
1282+
int(source_date_epoch) ).strftime("%a %b %d %H:%M:%S %Y")
1283+
else:
1284+
source_date = time.ctime()
1285+
print("%%CreationDate: "+source_date, file=fh)
12701286
print("%%%%BoundingBox: %d %d %d %d" % bbox, file=fh)
12711287
print("%%EndComments", file=fh)
12721288

lib/matplotlib/testing/determinism.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
"""
2+
Provides utilities to test output reproducibility.
3+
"""
4+
5+
from __future__ import (absolute_import, division, print_function,
6+
unicode_literals)
7+
8+
import six
9+
10+
import io
11+
import os
12+
import re
13+
import sys
14+
from subprocess import check_output
15+
16+
import matplotlib
17+
from matplotlib import pyplot as plt
18+
19+
from nose.plugins.skip import SkipTest
20+
21+
22+
def _determinism_save(objects='mhi', format="pdf", usetex=False):
23+
# save current value of SOURCE_DATE_EPOCH and set it
24+
# to a constant value, so that time difference is not
25+
# taken into account
26+
sde = os.environ.pop('SOURCE_DATE_EPOCH', None)
27+
os.environ['SOURCE_DATE_EPOCH'] = "946684800"
28+
29+
matplotlib.rcParams['text.usetex'] = usetex
30+
31+
fig = plt.figure()
32+
33+
if 'm' in objects:
34+
# use different markers...
35+
ax1 = fig.add_subplot(1, 6, 1)
36+
x = range(10)
37+
ax1.plot(x, [1] * 10, marker=u'D')
38+
ax1.plot(x, [2] * 10, marker=u'x')
39+
ax1.plot(x, [3] * 10, marker=u'^')
40+
ax1.plot(x, [4] * 10, marker=u'H')
41+
ax1.plot(x, [5] * 10, marker=u'v')
42+
43+
if 'h' in objects:
44+
# also use different hatch patterns
45+
ax2 = fig.add_subplot(1, 6, 2)
46+
bars = ax2.bar(range(1, 5), range(1, 5)) + \
47+
ax2.bar(range(1, 5), [6] * 4, bottom=range(1, 5))
48+
ax2.set_xticks([1.5, 2.5, 3.5, 4.5])
49+
50+
patterns = ('-', '+', 'x', '\\', '*', 'o', 'O', '.')
51+
for bar, pattern in zip(bars, patterns):
52+
bar.set_hatch(pattern)
53+
54+
if 'i' in objects:
55+
# also use different images
56+
A = [[1, 2, 3], [2, 3, 1], [3, 1, 2]]
57+
fig.add_subplot(1, 6, 3).imshow(A, interpolation='nearest')
58+
A = [[1, 3, 2], [1, 2, 3], [3, 1, 2]]
59+
fig.add_subplot(1, 6, 4).imshow(A, interpolation='bilinear')
60+
A = [[2, 3, 1], [1, 2, 3], [2, 1, 3]]
61+
fig.add_subplot(1, 6, 5).imshow(A, interpolation='bicubic')
62+
63+
x = range(5)
64+
fig.add_subplot(1, 6, 6).plot(x, x)
65+
66+
if six.PY2 and format == 'ps':
67+
stdout = io.StringIO()
68+
else:
69+
stdout = getattr(sys.stdout, 'buffer', sys.stdout)
70+
fig.savefig(stdout, format=format)
71+
if six.PY2 and format == 'ps':
72+
sys.stdout.write(stdout.getvalue())
73+
74+
# Restores SOURCE_DATE_EPOCH
75+
if sde is None:
76+
os.environ.pop('SOURCE_DATE_EPOCH', None)
77+
else:
78+
os.environ['SOURCE_DATE_EPOCH'] = sde
79+
80+
81+
def _determinism_check(objects='mhi', format="pdf", usetex=False):
82+
"""
83+
Output three times the same graphs and checks that the outputs are exactly
84+
the same.
85+
86+
Parameters
87+
----------
88+
objects : str
89+
contains characters corresponding to objects to be included in the test
90+
document: 'm' for markers, 'h' for hatch patterns, 'i' for images. The
91+
default value is "mhi", so that the test includes all these objects.
92+
format : str
93+
format string. The default value is "pdf".
94+
"""
95+
from nose.tools import assert_equal
96+
plots = []
97+
for i in range(3):
98+
result = check_output([sys.executable, '-R', '-c',
99+
'import matplotlib; '
100+
'matplotlib.use(%r); '
101+
'from matplotlib.testing.determinism '
102+
'import _determinism_save;'
103+
'_determinism_save(%r,%r,%r)'
104+
% (format, objects, format, usetex)])
105+
plots.append(result)
106+
for p in plots[1:]:
107+
if usetex:
108+
if p != plots[0]:
109+
raise SkipTest("failed, maybe due to ghostscript timestamps")
110+
else:
111+
assert_equal(p, plots[0])
112+
113+
114+
def _determinism_source_date_epoch(format, string, keyword=b"CreationDate"):
115+
"""
116+
Test SOURCE_DATE_EPOCH support. Output a document with the envionment
117+
variable SOURCE_DATE_EPOCH set to 2000-01-01 00:00 UTC and check that the
118+
document contains the timestamp that corresponds to this date (given as an
119+
argument).
120+
121+
Parameters
122+
----------
123+
format : str
124+
format string, such as "pdf".
125+
string : str
126+
timestamp string for 2000-01-01 00:00 UTC.
127+
keyword : bytes
128+
a string to look at when searching for the timestamp in the document
129+
(used in case the test fails).
130+
"""
131+
buff = check_output([sys.executable, '-R', '-c',
132+
'import matplotlib; '
133+
'matplotlib.use(%r); '
134+
'from matplotlib.testing.determinism '
135+
'import _determinism_save;'
136+
'_determinism_save(%r,%r)'
137+
% (format, "", format)])
138+
find_keyword = re.compile(b".*" + keyword + b".*")
139+
key = find_keyword.search(buff)
140+
if key:
141+
print(key.group())
142+
else:
143+
print("Timestamp keyword (%s) not found!" % keyword)
144+
assert string in buff

lib/matplotlib/tests/test_backend_pdf.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
from matplotlib import cm, rcParams
1313
from matplotlib.backends.backend_pdf import PdfPages
1414
from matplotlib import pyplot as plt
15+
from matplotlib.testing.determinism import (_determinism_source_date_epoch,
16+
_determinism_check)
1517
from matplotlib.testing.decorators import (image_comparison, knownfailureif,
1618
cleanup)
1719

@@ -98,8 +100,8 @@ def test_multipage_keep_empty():
98100

99101
@cleanup
100102
def test_composite_image():
101-
#Test that figures can be saved with and without combining multiple images
102-
#(on a single set of axes) into a single composite image.
103+
# Test that figures can be saved with and without combining multiple images
104+
# (on a single set of axes) into a single composite image.
103105
X, Y = np.meshgrid(np.arange(-5, 5, 1), np.arange(-5, 5, 1))
104106
Z = np.sin(Y ** 2)
105107
fig = plt.figure()
@@ -117,6 +119,42 @@ def test_composite_image():
117119
assert len(pdf._file._images.keys()) == 2
118120

119121

122+
@cleanup
123+
def test_source_date_epoch():
124+
"""Test SOURCE_DATE_EPOCH support for PDF output"""
125+
_determinism_source_date_epoch("pdf", b"/CreationDate (D:20000101000000Z)")
126+
127+
128+
@cleanup
129+
def test_determinism_plain():
130+
"""Test for reproducible PDF output: simple figure"""
131+
_determinism_check('', format="pdf")
132+
133+
134+
@cleanup
135+
def test_determinism_images():
136+
"""Test for reproducible PDF output: figure with different images"""
137+
_determinism_check('i', format="pdf")
138+
139+
140+
@cleanup
141+
def test_determinism_hatches():
142+
"""Test for reproducible PDF output: figure with different hatches"""
143+
_determinism_check('h', format="pdf")
144+
145+
146+
@cleanup
147+
def test_determinism_markers():
148+
"""Test for reproducible PDF output: figure with different markers"""
149+
_determinism_check('m', format="pdf")
150+
151+
152+
@cleanup
153+
def test_determinism_all():
154+
"""Test for reproducible PDF output"""
155+
_determinism_check(format="pdf")
156+
157+
120158
@image_comparison(baseline_images=['hatching_legend'],
121159
extensions=['pdf'])
122160
def test_hatching_legend():

0 commit comments

Comments
 (0)