Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 336c1bb

Browse files
committed
Merge pull request #4605 from jkseppan/png-in-pdf
ENH: Use png predictors when compressing images in pdf files
2 parents bc185a2 + 18dcc54 commit 336c1bb

File tree

2 files changed

+141
-74
lines changed

2 files changed

+141
-74
lines changed

lib/matplotlib/backends/backend_pdf.py

Lines changed: 96 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import codecs
1313
import os
1414
import re
15+
import struct
1516
import sys
1617
import time
1718
import warnings
@@ -43,6 +44,7 @@
4344
from matplotlib.transforms import Affine2D, BboxBase
4445
from matplotlib.path import Path
4546
from matplotlib import _path
47+
from matplotlib import _png
4648
from matplotlib import ttconv
4749

4850
# Overview
@@ -87,7 +89,6 @@
8789

8890
# TODOs:
8991
#
90-
# * image compression could be improved (PDF supports png-like compression)
9192
# * encoding of fonts, including mathtext fonts and unicode support
9293
# * TTF support has lots of small TODOs, e.g., how do you know if a font
9394
# is serif/sans-serif, or symbolic/non-symbolic?
@@ -334,11 +335,12 @@ class Stream(object):
334335
"""
335336
__slots__ = ('id', 'len', 'pdfFile', 'file', 'compressobj', 'extra', 'pos')
336337

337-
def __init__(self, id, len, file, extra=None):
338+
def __init__(self, id, len, file, extra=None, png=None):
338339
"""id: object id of stream; len: an unused Reference object for the
339340
length of the stream, or None (to use a memory buffer); file:
340341
a PdfFile; extra: a dictionary of extra key-value pairs to
341-
include in the stream header """
342+
include in the stream header; png: if the data is already
343+
png compressed, the decode parameters"""
342344
self.id = id # object id
343345
self.len = len # id of length object
344346
self.pdfFile = file
@@ -347,10 +349,13 @@ def __init__(self, id, len, file, extra=None):
347349
if extra is None:
348350
self.extra = dict()
349351
else:
350-
self.extra = extra
352+
self.extra = extra.copy()
353+
if png is not None:
354+
self.extra.update({'Filter': Name('FlateDecode'),
355+
'DecodeParms': png})
351356

352357
self.pdfFile.recordXref(self.id)
353-
if rcParams['pdf.compression']:
358+
if rcParams['pdf.compression'] and not png:
354359
self.compressobj = zlib.compressobj(rcParams['pdf.compression'])
355360
if self.len is None:
356361
self.file = BytesIO()
@@ -583,9 +588,9 @@ def output(self, *data):
583588
self.write(fill([pdfRepr(x) for x in data]))
584589
self.write(b'\n')
585590

586-
def beginStream(self, id, len, extra=None):
591+
def beginStream(self, id, len, extra=None, png=None):
587592
assert self.currentstream is None
588-
self.currentstream = Stream(id, len, self, extra)
593+
self.currentstream = Stream(id, len, self, extra, png)
589594

590595
def endStream(self):
591596
if self.currentstream is not None:
@@ -1247,73 +1252,103 @@ def imageObject(self, image):
12471252
self.images[image] = (name, ob)
12481253
return name
12491254

1250-
def _rgb(self, im):
1251-
h, w, s = im.as_rgba_str()
1255+
def _unpack(self, im):
1256+
"""
1257+
Unpack the image object im into height, width, data, alpha,
1258+
where data and alpha are HxWx3 (RGB) or HxWx1 (grayscale or alpha)
1259+
arrays, except alpha is None if the image is fully opaque.
1260+
"""
12521261

1262+
h, w, s = im.as_rgba_str()
12531263
rgba = np.fromstring(s, np.uint8)
12541264
rgba.shape = (h, w, 4)
12551265
rgba = rgba[::-1]
1256-
rgb = rgba[:, :, :3].tostring()
1257-
a = rgba[:, :, 3]
1258-
if np.all(a == 255):
1266+
rgb = rgba[:, :, :3]
1267+
alpha = rgba[:, :, 3][..., None]
1268+
if np.all(alpha == 255):
12591269
alpha = None
12601270
else:
1261-
alpha = a.tostring()
1262-
return h, w, rgb, alpha
1263-
1264-
def _gray(self, im, rc=0.3, gc=0.59, bc=0.11):
1265-
rgbat = im.as_rgba_str()
1266-
rgba = np.fromstring(rgbat[2], np.uint8)
1267-
rgba.shape = (rgbat[0], rgbat[1], 4)
1268-
rgba = rgba[::-1]
1269-
rgba_f = rgba.astype(np.float32)
1270-
r = rgba_f[:, :, 0]
1271-
g = rgba_f[:, :, 1]
1272-
b = rgba_f[:, :, 2]
1273-
a = rgba[:, :, 3]
1274-
if np.all(a == 255):
1275-
alpha = None
1271+
alpha = np.array(alpha, order='C')
1272+
if im.is_grayscale:
1273+
r, g, b = rgb.astype(np.float32).transpose(2, 0, 1)
1274+
gray = (0.3 * r + 0.59 * g + 0.11 * b).astype(np.uint8)[..., None]
1275+
return h, w, gray, alpha
12761276
else:
1277-
alpha = a.tostring()
1278-
gray = (r*rc + g*gc + b*bc).astype(np.uint8).tostring()
1279-
return rgbat[0], rgbat[1], gray, alpha
1277+
rgb = np.array(rgb, order='C')
1278+
return h, w, rgb, alpha
12801279

1281-
def writeImages(self):
1282-
for img, pair in six.iteritems(self.images):
1283-
if img.is_grayscale:
1284-
height, width, data, adata = self._gray(img)
1280+
def _writePng(self, data):
1281+
"""
1282+
Write the image *data* into the pdf file using png
1283+
predictors with Flate compression.
1284+
"""
1285+
1286+
buffer = BytesIO()
1287+
_png.write_png(data, buffer)
1288+
buffer.seek(8)
1289+
written = 0
1290+
header = bytearray(8)
1291+
while True:
1292+
n = buffer.readinto(header)
1293+
assert n == 8
1294+
length, type = struct.unpack(b'!L4s', bytes(header))
1295+
if type == b'IDAT':
1296+
data = bytearray(length)
1297+
n = buffer.readinto(data)
1298+
assert n == length
1299+
self.currentstream.write(bytes(data))
1300+
written += n
1301+
elif type == b'IEND':
1302+
break
12851303
else:
1286-
height, width, data, adata = self._rgb(img)
1304+
buffer.seek(length, 1)
1305+
buffer.seek(4, 1) # skip CRC
1306+
1307+
def _writeImg(self, data, height, width, grayscale, id, smask=None):
1308+
"""
1309+
Write the image *data* of size *height* x *width*, as grayscale
1310+
if *grayscale* is true and RGB otherwise, as pdf object *id*
1311+
and with the soft mask (alpha channel) *smask*, which should be
1312+
either None or a *height* x *width* x 1 array.
1313+
"""
12871314

1288-
colorspace = 'DeviceGray' if img.is_grayscale else 'DeviceRGB'
1289-
obj = {'Type': Name('XObject'),
1290-
'Subtype': Name('Image'),
1291-
'Width': width,
1292-
'Height': height,
1293-
'ColorSpace': Name(colorspace),
1294-
'BitsPerComponent': 8}
1315+
obj = {'Type': Name('XObject'),
1316+
'Subtype': Name('Image'),
1317+
'Width': width,
1318+
'Height': height,
1319+
'ColorSpace': Name('DeviceGray' if grayscale
1320+
else 'DeviceRGB'),
1321+
'BitsPerComponent': 8}
1322+
if smask:
1323+
obj['SMask'] = smask
1324+
if rcParams['pdf.compression']:
1325+
png = {'Predictor': 10,
1326+
'Colors': 1 if grayscale else 3,
1327+
'Columns': width}
1328+
else:
1329+
png = None
1330+
self.beginStream(
1331+
id,
1332+
self.reserveObject('length of image stream'),
1333+
obj,
1334+
png=png
1335+
)
1336+
if png:
1337+
self._writePng(data)
1338+
else:
1339+
self.currentstream.write(data.tostring())
1340+
self.endStream()
12951341

1342+
def writeImages(self):
1343+
for img, pair in six.iteritems(self.images):
1344+
height, width, data, adata = self._unpack(img)
12961345
if adata is not None:
12971346
smaskObject = self.reserveObject("smask")
1298-
self.beginStream(
1299-
smaskObject.id,
1300-
self.reserveObject('length of smask stream'),
1301-
{'Type': Name('XObject'), 'Subtype': Name('Image'),
1302-
'Width': width, 'Height': height,
1303-
'ColorSpace': Name('DeviceGray'), 'BitsPerComponent': 8})
1304-
# TODO: predictors (i.e., output png)
1305-
self.currentstream.write(adata)
1306-
self.endStream()
1307-
obj['SMask'] = smaskObject
1308-
1309-
self.beginStream(
1310-
pair[1].id,
1311-
self.reserveObject('length of image stream'),
1312-
obj
1313-
)
1314-
# TODO: predictors (i.e., output png)
1315-
self.currentstream.write(data)
1316-
self.endStream()
1347+
self._writeImg(adata, height, width, True, smaskObject.id)
1348+
else:
1349+
smaskObject = None
1350+
self._writeImg(data, height, width, img.is_grayscale,
1351+
pair[1].id, smaskObject)
13171352

13181353
def markerObject(self, path, trans, fill, stroke, lw, joinstyle,
13191354
capstyle):

src/_png.cpp

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,15 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
7171
double dpi = 0;
7272
const char *names[] = { "buffer", "file", "dpi", NULL };
7373

74+
// We don't need strict contiguity, just for each row to be
75+
// contiguous, and libpng has special handling for getting RGB out
76+
// of RGBA, ARGB or BGR. But the simplest thing to do is to
77+
// enforce contiguity using array_view::converter_contiguous.
7478
if (!PyArg_ParseTupleAndKeywords(args,
7579
kwds,
7680
"O&O|d:write_png",
7781
(char **)names,
78-
&buffer.converter,
82+
&buffer.converter_contiguous,
7983
&buffer,
8084
&filein,
8185
&dpi)) {
@@ -84,6 +88,7 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
8488

8589
png_uint_32 width = (png_uint_32)buffer.dim(1);
8690
png_uint_32 height = (png_uint_32)buffer.dim(0);
91+
int channels = buffer.dim(2);
8792
std::vector<png_bytep> row_pointers(height);
8893
for (png_uint_32 row = 0; row < (png_uint_32)height; ++row) {
8994
row_pointers[row] = (png_bytep)buffer[row].data();
@@ -98,9 +103,22 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
98103
png_structp png_ptr = NULL;
99104
png_infop info_ptr = NULL;
100105
struct png_color_8_struct sig_bit;
101-
102-
if (buffer.dim(2) != 4) {
103-
PyErr_SetString(PyExc_ValueError, "Buffer must be RGBA NxMx4 array");
106+
int png_color_type;
107+
108+
switch (channels) {
109+
case 1:
110+
png_color_type = PNG_COLOR_TYPE_GRAY;
111+
break;
112+
case 3:
113+
png_color_type = PNG_COLOR_TYPE_RGB;
114+
break;
115+
case 4:
116+
png_color_type = PNG_COLOR_TYPE_RGB_ALPHA;
117+
break;
118+
default:
119+
PyErr_SetString(PyExc_ValueError,
120+
"Buffer must be an NxMxD array with D in 1, 3, 4 "
121+
"(grayscale, RGB, RGBA)");
104122
goto exit;
105123
}
106124

@@ -141,7 +159,7 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
141159
}
142160

143161
if (setjmp(png_jmpbuf(png_ptr))) {
144-
PyErr_SetString(PyExc_RuntimeError, "Error setting jumps");
162+
PyErr_SetString(PyExc_RuntimeError, "libpng signaled error");
145163
goto exit;
146164
}
147165

@@ -155,7 +173,7 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
155173
width,
156174
height,
157175
8,
158-
PNG_COLOR_TYPE_RGB_ALPHA,
176+
png_color_type,
159177
PNG_INTERLACE_NONE,
160178
PNG_COMPRESSION_TYPE_BASE,
161179
PNG_FILTER_TYPE_BASE);
@@ -166,13 +184,27 @@ static PyObject *Py_write_png(PyObject *self, PyObject *args, PyObject *kwds)
166184
png_set_pHYs(png_ptr, info_ptr, dots_per_meter, dots_per_meter, PNG_RESOLUTION_METER);
167185
}
168186

169-
// this a a color image!
170-
sig_bit.gray = 0;
171-
sig_bit.red = 8;
172-
sig_bit.green = 8;
173-
sig_bit.blue = 8;
174-
/* if the image has an alpha channel then */
175-
sig_bit.alpha = 8;
187+
sig_bit.alpha = 0;
188+
switch (png_color_type) {
189+
case PNG_COLOR_TYPE_GRAY:
190+
sig_bit.gray = 8;
191+
sig_bit.red = 0;
192+
sig_bit.green = 0;
193+
sig_bit.blue = 0;
194+
break;
195+
case PNG_COLOR_TYPE_RGB_ALPHA:
196+
sig_bit.alpha = 8;
197+
// fall through
198+
case PNG_COLOR_TYPE_RGB:
199+
sig_bit.gray = 0;
200+
sig_bit.red = 8;
201+
sig_bit.green = 8;
202+
sig_bit.blue = 8;
203+
break;
204+
default:
205+
PyErr_SetString(PyExc_RuntimeError, "internal error, bad png_color_type");
206+
goto exit;
207+
}
176208
png_set_sBIT(png_ptr, info_ptr, &sig_bit);
177209

178210
png_write_info(png_ptr, info_ptr);

0 commit comments

Comments
 (0)