Merge pull request #17895 from mpetroff/pdf-indexed-color

jklymak · web-flow · commit be2e5d785a05 · 2020-07-14T09:14:06.000-07:00
Use indexed color for PNG images in PDF files when possible
diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py
@@ -1521,28 +1521,34 @@ def _unpack(self, im):
                 alpha = None
             return rgb, alpha
 
-    def _writePng(self, data):
+    def _writePng(self, img):
         """
-        Write the image *data* into the pdf file using png
+        Write the image *img* into the pdf file using png
         predictors with Flate compression.
         """
         buffer = BytesIO()
-        if data.shape[-1] == 1:
-            data = data.squeeze(axis=-1)
-        Image.fromarray(data).save(buffer, format="png")
+        img.save(buffer, format="png")
         buffer.seek(8)
+        png_data = b''
+        bit_depth = palette = None
         while True:
             length, type = struct.unpack(b'!L4s', buffer.read(8))
-            if type == b'IDAT':
+            if type in [b'IHDR', b'PLTE', b'IDAT']:
                 data = buffer.read(length)
                 if len(data) != length:
                     raise RuntimeError("truncated data")
-                self.currentstream.write(data)
+                if type == b'IHDR':
+                    bit_depth = int(data[8])
+                elif type == b'PLTE':
+                    palette = data
+                elif type == b'IDAT':
+                    png_data += data
             elif type == b'IEND':
                 break
             else:
                 buffer.seek(length, 1)
             buffer.seek(4, 1)   # skip CRC
+        return png_data, bit_depth, palette
 
     def _writeImg(self, data, id, smask=None):
         """
@@ -1551,17 +1557,40 @@ def _writeImg(self, data, id, smask=None):
         (alpha channel) *smask*, which should be either None or a ``(height,
         width, 1)`` array.
         """
-        height, width, colors = data.shape
+        height, width, color_channels = data.shape
         obj = {'Type': Name('XObject'),
                'Subtype': Name('Image'),
                'Width': width,
                'Height': height,
-               'ColorSpace': Name({1: 'DeviceGray', 3: 'DeviceRGB'}[colors]),
+               'ColorSpace': Name({1: 'DeviceGray',
+                                   3: 'DeviceRGB'}[color_channels]),
                'BitsPerComponent': 8}
         if smask:
             obj['SMask'] = smask
         if mpl.rcParams['pdf.compression']:
-            png = {'Predictor': 10, 'Colors': colors, 'Columns': width}
+            if data.shape[-1] == 1:
+                data = data.squeeze(axis=-1)
+            img = Image.fromarray(data)
+            img_colors = img.getcolors(maxcolors=256)
+            if color_channels == 3 and img_colors is not None:
+                # Convert to indexed color if there are 256 colors or fewer
+                # This can significantly reduce the file size
+                num_colors = len(img_colors)
+                img = img.convert(mode='P', dither=Image.NONE,
+                                  palette=Image.ADAPTIVE, colors=num_colors)
+                png_data, bit_depth, palette = self._writePng(img)
+                if bit_depth is None or palette is None:
+                    raise RuntimeError("invalid PNG header")
+                palette = palette[:num_colors * 3]  # Trim padding
+                palette = pdfRepr(palette)
+                obj['ColorSpace'] = Verbatim(b'[/Indexed /DeviceRGB '
+                                             + str(num_colors - 1).encode()
+                                             + b' ' + palette + b']')
+                obj['BitsPerComponent'] = bit_depth
+                color_channels = 1
+            else:
+                png_data, _, _ = self._writePng(img)
+            png = {'Predictor': 10, 'Colors': color_channels, 'Columns': width}
         else:
             png = None
         self.beginStream(
@@ -1571,7 +1600,7 @@ def _writeImg(self, data, id, smask=None):
             png=png
             )
         if png:
-            self._writePng(data)
+            self.currentstream.write(png_data)
         else:
             self.currentstream.write(data.tobytes())
         self.endStream()
diff --git a/lib/matplotlib/tests/test_image.py b/lib/matplotlib/tests/test_image.py
@@ -732,7 +732,11 @@ def test_log_scale_image():
     ax.set(yscale='log')
 
 
-@image_comparison(['rotate_image'], remove_text=True)
+# Increased tolerance is needed for PDF test to avoid failure. After the PDF
+# backend was modified to use indexed color, there are ten pixels that differ
+# due to how the subpixel calculation is done when converting the PDF files to
+# PNG images.
+@image_comparison(['rotate_image'], remove_text=True, tol=0.35)
 def test_rotate_image():
     delta = 0.25
     x = y = np.arange(-3.0, 3.0, delta)