Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 60161fa

Browse files
committed
Compress raster images in PDF output using libpng
The PDF format allows for PNG predictors in Flate-compressed streams, so we can use libpng to encode a png file and extract the row data from that.
1 parent 9995cd5 commit 60161fa

1 file changed

Lines changed: 73 additions & 44 deletions

File tree

lib/matplotlib/backends/backend_pdf.py

Lines changed: 73 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import codecs
1313
import os
1414
import re
15+
import struct
1516
import sys
1617
import time
1718
import warnings
@@ -43,6 +44,7 @@
4344
from matplotlib.transforms import Affine2D, BboxBase
4445
from matplotlib.path import Path
4546
from matplotlib import _path
47+
from matplotlib import _png
4648
from matplotlib import ttconv
4749

4850
# Overview
@@ -87,7 +89,6 @@
8789

8890
# TODOs:
8991
#
90-
# * image compression could be improved (PDF supports png-like compression)
9192
# * encoding of fonts, including mathtext fonts and unicode support
9293
# * TTF support has lots of small TODOs, e.g., how do you know if a font
9394
# is serif/sans-serif, or symbolic/non-symbolic?
@@ -341,11 +342,12 @@ class Stream(object):
341342
"""
342343
__slots__ = ('id', 'len', 'pdfFile', 'file', 'compressobj', 'extra', 'pos')
343344

344-
def __init__(self, id, len, file, extra=None):
345+
def __init__(self, id, len, file, extra=None, png=None):
345346
"""id: object id of stream; len: an unused Reference object for the
346347
length of the stream, or None (to use a memory buffer); file:
347348
a PdfFile; extra: a dictionary of extra key-value pairs to
348-
include in the stream header """
349+
include in the stream header; png: if the data is already
350+
png compressed, the decode parameters"""
349351
self.id = id # object id
350352
self.len = len # id of length object
351353
self.pdfFile = file
@@ -354,10 +356,13 @@ def __init__(self, id, len, file, extra=None):
354356
if extra is None:
355357
self.extra = dict()
356358
else:
357-
self.extra = extra
359+
self.extra = extra.copy()
360+
if png is not None:
361+
self.extra.update({'Filter': Name('FlateDecode'),
362+
'DecodeParms': png})
358363

359364
self.pdfFile.recordXref(self.id)
360-
if rcParams['pdf.compression']:
365+
if rcParams['pdf.compression'] and not png:
361366
self.compressobj = zlib.compressobj(rcParams['pdf.compression'])
362367
if self.len is None:
363368
self.file = BytesIO()
@@ -590,9 +595,9 @@ def output(self, *data):
590595
self.write(fill([pdfRepr(x) for x in data]))
591596
self.write(b'\n')
592597

593-
def beginStream(self, id, len, extra=None):
598+
def beginStream(self, id, len, extra=None, png=None):
594599
assert self.currentstream is None
595-
self.currentstream = Stream(id, len, self, extra)
600+
self.currentstream = Stream(id, len, self, extra, png)
596601

597602
def endStream(self):
598603
if self.currentstream is not None:
@@ -1262,12 +1267,10 @@ def _rgb(self, im):
12621267
rgba = np.fromstring(s, np.uint8)
12631268
rgba.shape = (h, w, 4)
12641269
rgba = rgba[::-1]
1265-
rgb = rgba[:, :, :3].tostring()
1266-
a = rgba[:, :, 3]
1267-
if np.all(a == 255):
1270+
rgb = np.ascontiguousarray(rgba[:, :, :3])
1271+
alpha = np.ascontiguousarray(rgba[:, :, 3][..., None])
1272+
if np.all(alpha == 255):
12681273
alpha = None
1269-
else:
1270-
alpha = a.tostring()
12711274
return h, w, rgb, alpha
12721275

12731276
def _gray(self, im, rc=0.3, gc=0.59, bc=0.11):
@@ -1279,50 +1282,76 @@ def _gray(self, im, rc=0.3, gc=0.59, bc=0.11):
12791282
r = rgba_f[:, :, 0]
12801283
g = rgba_f[:, :, 1]
12811284
b = rgba_f[:, :, 2]
1282-
a = rgba[:, :, 3]
1283-
if np.all(a == 255):
1285+
alpha = np.ascontiguousarray(rgba[:, :, 3][..., None])
1286+
if np.all(alpha == 255):
12841287
alpha = None
1285-
else:
1286-
alpha = a.tostring()
1287-
gray = (r*rc + g*gc + b*bc).astype(np.uint8).tostring()
1288+
gray = (r*rc + g*gc + b*bc).astype(np.uint8)[..., None]
12881289
return rgbat[0], rgbat[1], gray, alpha
12891290

1291+
def _writePng(self, data):
1292+
buffer = BytesIO()
1293+
_png.write_png(data, buffer)
1294+
buffer.seek(8)
1295+
written = 0
1296+
header = bytearray(8)
1297+
while True:
1298+
n = buffer.readinto(header)
1299+
assert n == 8
1300+
length, type = struct.unpack('!L4s', header)
1301+
if type == b'IDAT':
1302+
data = bytearray(length)
1303+
n = buffer.readinto(data)
1304+
assert n == length
1305+
self.currentstream.write(data)
1306+
written += n
1307+
elif type == b'IEND':
1308+
break
1309+
else:
1310+
buffer.seek(length, 1)
1311+
buffer.seek(4, 1) # skip CRC
1312+
1313+
def _writeImg(self, data, height, width, grayscale, id, smask=None):
1314+
obj = {'Type': Name('XObject'),
1315+
'Subtype': Name('Image'),
1316+
'Width': width,
1317+
'Height': height,
1318+
'ColorSpace': Name('DeviceGray' if grayscale
1319+
else 'DeviceRGB'),
1320+
'BitsPerComponent': 8}
1321+
if smask:
1322+
obj['Smask'] = smask
1323+
if rcParams['pdf.compression']:
1324+
png = {'Predictor': 10,
1325+
'Colors': 1 if grayscale else 3,
1326+
'Columns': width}
1327+
else:
1328+
png = None
1329+
self.beginStream(
1330+
id,
1331+
self.reserveObject('length of image stream'),
1332+
obj,
1333+
png=png
1334+
)
1335+
if png:
1336+
self._writePng(data)
1337+
else:
1338+
self.currentstream.write(data.tostring())
1339+
self.endStream()
1340+
12901341
def writeImages(self):
12911342
for img, pair in six.iteritems(self.images):
12921343
if img.is_grayscale:
12931344
height, width, data, adata = self._gray(img)
12941345
else:
12951346
height, width, data, adata = self._rgb(img)
12961347

1297-
colorspace = 'DeviceGray' if img.is_grayscale else 'DeviceRGB'
1298-
obj = {'Type': Name('XObject'),
1299-
'Subtype': Name('Image'),
1300-
'Width': width,
1301-
'Height': height,
1302-
'ColorSpace': Name(colorspace),
1303-
'BitsPerComponent': 8}
1304-
13051348
if adata is not None:
13061349
smaskObject = self.reserveObject("smask")
1307-
self.beginStream(
1308-
smaskObject.id,
1309-
self.reserveObject('length of smask stream'),
1310-
{'Type': Name('XObject'), 'Subtype': Name('Image'),
1311-
'Width': width, 'Height': height,
1312-
'ColorSpace': Name('DeviceGray'), 'BitsPerComponent': 8})
1313-
# TODO: predictors (i.e., output png)
1314-
self.currentstream.write(adata)
1315-
self.endStream()
1316-
obj['SMask'] = smaskObject
1317-
1318-
self.beginStream(
1319-
pair[1].id,
1320-
self.reserveObject('length of image stream'),
1321-
obj
1322-
)
1323-
# TODO: predictors (i.e., output png)
1324-
self.currentstream.write(data)
1325-
self.endStream()
1350+
self._writeImg(adata, height, width, True, smaskObject.id)
1351+
else:
1352+
smaskObject = None
1353+
self._writeImg(data, height, width, img.is_grayscale,
1354+
pair[1].id, smaskObject)
13261355

13271356
def markerObject(self, path, trans, fillp, strokep, lw, joinstyle,
13281357
capstyle):

0 commit comments

Comments
 (0)