Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Reproducible PS/PDF output (master) #6597

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from Dec 9, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
c6a4660
To allow reproducible output:
Jun 16, 2016
4e477dc
Reproducible PDF output: sort TTF characters
Jun 18, 2016
946ae45
Reproducible PDF output: sort fonts
Jun 18, 2016
22f71a2
Tests for determinist PDF output:
Jun 18, 2016
ad660d7
Share determinism test code for PS and PDF output.
Jun 27, 2016
37c28b4
Reproducible PS/tex output.
Jun 27, 2016
2e1c773
Removes test_source_date_epoch_tex test, since this is ghostscript de…
Jun 27, 2016
541f97e
PEP8
Jun 27, 2016
2a6ebc8
Add what's new section
Jul 7, 2016
6ee8967
Add some insight when test_source_date_epoch fails.
Jul 8, 2016
a3185e6
Allow parallel execution of test_backend_ps:test_determinism_all_tex …
Jul 8, 2016
8f095f6
Use subprocess for _test_source_date_epoch, to allow parallel calls (…
Jul 8, 2016
c007f49
Change SOURCE_DATE_EPOCH test date, to use two-digits numbers for mon…
Jul 11, 2016
ecbdd55
PEP8
Jul 11, 2016
65ec88e
Warnings about possible unreproducibility issues
Jul 12, 2016
5b405cc
Doc rephrasing, thanks to jkseppan.
Jul 12, 2016
d10a21e
Use explicit date formatting for PS backend timestamp, instead of asc…
Jul 12, 2016
da55bb6
Revert to 2000-01-01 for the SOURCE_DATE_EPOCH test date.
Jul 12, 2016
c56dae7
Use standard date format for PS timestamp
Jul 13, 2016
995173d
Rename functions in determinism.py to remove `test' keyword, since th…
Oct 4, 2016
eef6b12
TST: Use standard I/O for determinism tests.
QuLogic Oct 7, 2016
fb529da
TST: Remove multiple nested imports.
QuLogic Oct 7, 2016
ebff832
TST: Fix compatibility with Python 2.
QuLogic Oct 8, 2016
f6301c2
Merge pull request #1 from QuLogic/reproducible-master
Oct 8, 2016
1786555
Adds __future__ imports to testing/determinism.py
Oct 9, 2016
af4213e
Pass usetex setting to _determinism_save
Oct 11, 2016
bf7387e
Skip test using ghostscript, since failing may be due to ghostscript …
Oct 11, 2016
2cdc577
Forgot to change one timestamp format in c56dae7c52af50ceaca33ba14717…
Oct 11, 2016
76bec02
Reuse UTC timezone from dates.py
Nov 2, 2016
bbab0c5
Removes now useless option uid for _determinism_check
Nov 3, 2016
1a5ada6
Typo
Nov 3, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions doc/users/whats_new/reproducible_ps_pdf.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Reproducible PS and PDF output
------------------------------

The ``SOURCE_DATE_EPOCH`` environment variable can now be used to set
the timestamp value in the PS and PDF outputs. See
https://reproducible-builds.org/specs/source-date-epoch/

The reproducibility of the output from the PS and PDF backends has so
far been tested using various plot elements but only default values of
options such as ``{ps,pdf}.fonttype`` that can affect the output at a
low level, and not with the mathtext or usetex features. When
matplotlib calls external tools (such as PS distillers or LaTeX) their
versions need to be kept constant for reproducibility, and they may
add sources of nondeterminism outside the control of matplotlib.
38 changes: 28 additions & 10 deletions lib/matplotlib/backends/backend_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@
import time
import warnings
import zlib
import collections
from io import BytesIO
from functools import total_ordering

import numpy as np
from six import unichr


from datetime import datetime
from datetime import datetime, tzinfo, timedelta
from math import ceil, cos, floor, pi, sin

import matplotlib
Expand All @@ -45,6 +46,7 @@
from matplotlib.mathtext import MathTextParser
from matplotlib.transforms import Affine2D, BboxBase
from matplotlib.path import Path
from matplotlib.dates import UTC
from matplotlib import _path
from matplotlib import _png
from matplotlib import ttconv
Expand Down Expand Up @@ -202,10 +204,14 @@ def pdfRepr(obj):
# A date.
elif isinstance(obj, datetime):
r = obj.strftime('D:%Y%m%d%H%M%S')
if time.daylight:
z = time.altzone
z = obj.utcoffset()
if z is not None:
z = z.seconds
else:
z = time.timezone
if time.daylight:
z = time.altzone
else:
z = time.timezone
if z == 0:
r += 'Z'
elif z < 0:
Expand Down Expand Up @@ -468,10 +474,19 @@ def __init__(self, filename):
self.writeObject(self.rootObject, root)

revision = ''
# get source date from SOURCE_DATE_EPOCH, if set
# See https://reproducible-builds.org/specs/source-date-epoch/
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
if source_date_epoch:
source_date = datetime.utcfromtimestamp(int(source_date_epoch))
source_date = source_date.replace(tzinfo=UTC)
else:
source_date = datetime.today()

self.infoDict = {
'Creator': 'matplotlib %s, http://matplotlib.org' % __version__,
'Producer': 'matplotlib pdf backend%s' % revision,
'CreationDate': datetime.today()
'CreationDate': source_date
}

self.fontNames = {} # maps filenames to internal font names
Expand All @@ -483,14 +498,15 @@ def __init__(self, filename):

self.alphaStates = {} # maps alpha values to graphics state objects
self.nextAlphaState = 1
self.hatchPatterns = {}
# reproducible writeHatches needs an ordered dict:
self.hatchPatterns = collections.OrderedDict()
self.nextHatch = 1
self.gouraudTriangles = []

self._images = {}
self._images = collections.OrderedDict() # reproducible writeImages
self.nextImage = 1

self.markers = {}
self.markers = collections.OrderedDict() # reproducible writeMarkers
self.multi_byte_charprocs = {}

self.paths = []
Expand Down Expand Up @@ -640,7 +656,8 @@ def fontName(self, fontprop):

def writeFonts(self):
fonts = {}
for filename, Fx in six.iteritems(self.fontNames):
for filename in sorted(self.fontNames):
Fx = self.fontNames[filename]
matplotlib.verbose.report('Embedding font %s' % filename, 'debug')
if filename.endswith('.afm'):
# from pdf.use14corefonts
Expand Down Expand Up @@ -920,7 +937,8 @@ def get_char_width(charcode):
rawcharprocs = ttconv.get_pdf_charprocs(
filename.encode(sys.getfilesystemencoding()), glyph_ids)
charprocs = {}
for charname, stream in six.iteritems(rawcharprocs):
for charname in sorted(rawcharprocs):
stream = rawcharprocs[charname]
charprocDict = {'Length': len(stream)}
# The 2-byte characters are used as XObjects, so they
# need extra info in their dictionary
Expand Down
22 changes: 19 additions & 3 deletions lib/matplotlib/backends/backend_ps.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import six
from six.moves import StringIO

import glob, math, os, shutil, sys, time
import glob, math, os, shutil, sys, time, datetime
def _fn_name(): return sys._getframe(1).f_code.co_name
import io

Expand Down Expand Up @@ -1087,7 +1087,15 @@ def print_figure_impl():
if title: print("%%Title: "+title, file=fh)
print(("%%Creator: matplotlib version "
+__version__+", http://matplotlib.org/"), file=fh)
print("%%CreationDate: "+time.ctime(time.time()), file=fh)
# get source date from SOURCE_DATE_EPOCH, if set
# See https://reproducible-builds.org/specs/source-date-epoch/
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
if source_date_epoch:
source_date = datetime.datetime.utcfromtimestamp(
int(source_date_epoch) ).strftime("%a %b %d %H:%M:%S %Y")
else:
source_date = time.ctime()
print("%%CreationDate: "+source_date, file=fh)
print("%%Orientation: " + orientation, file=fh)
if not isEPSF: print("%%DocumentPaperSizes: "+papertype, file=fh)
print("%%%%BoundingBox: %d %d %d %d" % bbox, file=fh)
Expand Down Expand Up @@ -1270,7 +1278,15 @@ def write(self, *kl, **kwargs):
if title: print("%%Title: "+title, file=fh)
print(("%%Creator: matplotlib version "
+__version__+", http://matplotlib.org/"), file=fh)
print("%%CreationDate: "+time.ctime(time.time()), file=fh)
# get source date from SOURCE_DATE_EPOCH, if set
# See https://reproducible-builds.org/specs/source-date-epoch/
source_date_epoch = os.getenv("SOURCE_DATE_EPOCH")
if source_date_epoch:
source_date = datetime.datetime.utcfromtimestamp(
int(source_date_epoch) ).strftime("%a %b %d %H:%M:%S %Y")
else:
source_date = time.ctime()
print("%%CreationDate: "+source_date, file=fh)
print("%%%%BoundingBox: %d %d %d %d" % bbox, file=fh)
print("%%EndComments", file=fh)

Expand Down
144 changes: 144 additions & 0 deletions lib/matplotlib/testing/determinism.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
"""
Provides utilities to test output reproducibility.
"""

from __future__ import (absolute_import, division, print_function,
unicode_literals)

import six

import io
import os
import re
import sys
from subprocess import check_output

import matplotlib
from matplotlib import pyplot as plt

from nose.plugins.skip import SkipTest


def _determinism_save(objects='mhi', format="pdf", usetex=False):
# save current value of SOURCE_DATE_EPOCH and set it
# to a constant value, so that time difference is not
# taken into account
sde = os.environ.pop('SOURCE_DATE_EPOCH', None)
os.environ['SOURCE_DATE_EPOCH'] = "946684800"

matplotlib.rcParams['text.usetex'] = usetex

fig = plt.figure()

if 'm' in objects:
# use different markers...
ax1 = fig.add_subplot(1, 6, 1)
x = range(10)
ax1.plot(x, [1] * 10, marker=u'D')
ax1.plot(x, [2] * 10, marker=u'x')
ax1.plot(x, [3] * 10, marker=u'^')
ax1.plot(x, [4] * 10, marker=u'H')
ax1.plot(x, [5] * 10, marker=u'v')

if 'h' in objects:
# also use different hatch patterns
ax2 = fig.add_subplot(1, 6, 2)
bars = ax2.bar(range(1, 5), range(1, 5)) + \
ax2.bar(range(1, 5), [6] * 4, bottom=range(1, 5))
ax2.set_xticks([1.5, 2.5, 3.5, 4.5])

patterns = ('-', '+', 'x', '\\', '*', 'o', 'O', '.')
for bar, pattern in zip(bars, patterns):
bar.set_hatch(pattern)

if 'i' in objects:
# also use different images
A = [[1, 2, 3], [2, 3, 1], [3, 1, 2]]
fig.add_subplot(1, 6, 3).imshow(A, interpolation='nearest')
A = [[1, 3, 2], [1, 2, 3], [3, 1, 2]]
fig.add_subplot(1, 6, 4).imshow(A, interpolation='bilinear')
A = [[2, 3, 1], [1, 2, 3], [2, 1, 3]]
fig.add_subplot(1, 6, 5).imshow(A, interpolation='bicubic')

x = range(5)
fig.add_subplot(1, 6, 6).plot(x, x)

if six.PY2 and format == 'ps':
stdout = io.StringIO()
else:
stdout = getattr(sys.stdout, 'buffer', sys.stdout)
fig.savefig(stdout, format=format)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just fig.savefig(getattr(sys.stdout, 'buffer', sys.stdout), format=format)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does not work with the PS backend, which tries to wrap the input in TextIOWrapper.

if six.PY2 and format == 'ps':
sys.stdout.write(stdout.getvalue())

# Restores SOURCE_DATE_EPOCH
if sde is None:
os.environ.pop('SOURCE_DATE_EPOCH', None)
else:
os.environ['SOURCE_DATE_EPOCH'] = sde


def _determinism_check(objects='mhi', format="pdf", usetex=False):
"""
Output three times the same graphs and checks that the outputs are exactly
the same.

Parameters
----------
objects : str
contains characters corresponding to objects to be included in the test
document: 'm' for markers, 'h' for hatch patterns, 'i' for images. The
default value is "mhi", so that the test includes all these objects.
format : str
format string. The default value is "pdf".
"""
from nose.tools import assert_equal
plots = []
for i in range(3):
result = check_output([sys.executable, '-R', '-c',
'import matplotlib; '
'matplotlib.use(%r); '
'from matplotlib.testing.determinism '
'import _determinism_save;'
'_determinism_save(%r,%r,%r)'
% (format, objects, format, usetex)])
plots.append(result)
for p in plots[1:]:
if usetex:
if p != plots[0]:
raise SkipTest("failed, maybe due to ghostscript timestamps")
else:
assert_equal(p, plots[0])


def _determinism_source_date_epoch(format, string, keyword=b"CreationDate"):
"""
Test SOURCE_DATE_EPOCH support. Output a document with the envionment
variable SOURCE_DATE_EPOCH set to 2000-01-01 00:00 UTC and check that the
document contains the timestamp that corresponds to this date (given as an
argument).

Parameters
----------
format : str
format string, such as "pdf".
string : str
timestamp string for 2000-01-01 00:00 UTC.
keyword : bytes
a string to look at when searching for the timestamp in the document
(used in case the test fails).
"""
buff = check_output([sys.executable, '-R', '-c',
'import matplotlib; '
'matplotlib.use(%r); '
'from matplotlib.testing.determinism '
'import _determinism_save;'
'_determinism_save(%r,%r)'
% (format, "", format)])
find_keyword = re.compile(b".*" + keyword + b".*")
key = find_keyword.search(buff)
if key:
print(key.group())
else:
print("Timestamp keyword (%s) not found!" % keyword)
assert string in buff
42 changes: 40 additions & 2 deletions lib/matplotlib/tests/test_backend_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from matplotlib import cm, rcParams
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib import pyplot as plt
from matplotlib.testing.determinism import (_determinism_source_date_epoch,
_determinism_check)
from matplotlib.testing.decorators import (image_comparison, knownfailureif,
cleanup)

Expand Down Expand Up @@ -90,8 +92,8 @@ def test_multipage_keep_empty():

@cleanup
def test_composite_image():
#Test that figures can be saved with and without combining multiple images
#(on a single set of axes) into a single composite image.
# Test that figures can be saved with and without combining multiple images
# (on a single set of axes) into a single composite image.
X, Y = np.meshgrid(np.arange(-5, 5, 1), np.arange(-5, 5, 1))
Z = np.sin(Y ** 2)
fig = plt.figure()
Expand All @@ -109,6 +111,42 @@ def test_composite_image():
assert len(pdf._file._images.keys()) == 2


@cleanup
def test_source_date_epoch():
"""Test SOURCE_DATE_EPOCH support for PDF output"""
_determinism_source_date_epoch("pdf", b"/CreationDate (D:20000101000000Z)")


@cleanup
def test_determinism_plain():
"""Test for reproducible PDF output: simple figure"""
_determinism_check('', format="pdf")


@cleanup
def test_determinism_images():
"""Test for reproducible PDF output: figure with different images"""
_determinism_check('i', format="pdf")


@cleanup
def test_determinism_hatches():
"""Test for reproducible PDF output: figure with different hatches"""
_determinism_check('h', format="pdf")


@cleanup
def test_determinism_markers():
"""Test for reproducible PDF output: figure with different markers"""
_determinism_check('m', format="pdf")


@cleanup
def test_determinism_all():
"""Test for reproducible PDF output"""
_determinism_check(format="pdf")


@image_comparison(baseline_images=['hatching_legend'],
extensions=['pdf'])
def test_hatching_legend():
Expand Down
Loading