matplotlib · tacaswell · Oct 7, 2020 · Jul 4, 2020 · Jul 6, 2020
diff --git a/doc/users/next_whats_new/pdf_urls.rst b/doc/users/next_whats_new/pdf_urls.rst
@@ -0,0 +1,5 @@
+PDF supports URLs on ``Text`` artists
+-------------------------------------
+
+URLs on `.text.Text` artists (i.e., from `.Artist.set_url`) will now be saved
+in PDF files.
diff --git a/lib/matplotlib/backends/backend_pdf.py b/lib/matplotlib/backends/backend_pdf.py
@@ -694,7 +694,13 @@ def __init__(self, filename, metadata=None):
 
         self.paths = []
 
-        self.pageAnnotations = []  # A list of annotations for the current page
+        # A list of annotations for each page. Each entry is a tuple of the
+        # overall Annots object reference that's inserted into the page object,
+        # followed by a list of the actual annotations.
+        self._annotations = []
+        # For annotations added before a page is created; mostly for the
+        # purpose of newTextnote.
+        self.pageAnnotations = []
 
         # The PDF spec recommends to include every procset
         procsets = [Name(x) for x in "PDF Text ImageB ImageC ImageI".split()]
@@ -720,6 +726,7 @@ def newPage(self, width, height):
 
         self.width, self.height = width, height
         contentObject = self.reserveObject('page contents')
+        annotsObject = self.reserveObject('annotations')
         thePage = {'Type': Name('Page'),
                    'Parent': self.pagesObject,
                    'Resources': self.resourceObject,
@@ -728,11 +735,12 @@ def newPage(self, width, height):
                    'Group': {'Type': Name('Group'),
                              'S': Name('Transparency'),
                              'CS': Name('DeviceRGB')},
-                   'Annots': self.pageAnnotations,
+                   'Annots': annotsObject,
                    }
         pageObject = self.reserveObject('page')
         self.writeObject(pageObject, thePage)
         self.pageList.append(pageObject)
+        self._annotations.append((annotsObject, self.pageAnnotations))
 
         self.beginStream(contentObject.id,
                          self.reserveObject('length of content stream'))
@@ -750,14 +758,13 @@ def newTextnote(self, text, positionRect=[-100, -100, 0, 0]):
                    'Contents': text,
                    'Rect': positionRect,
                    }
-        annotObject = self.reserveObject('annotation')
-        self.writeObject(annotObject, theNote)
-        self.pageAnnotations.append(annotObject)
+        self.pageAnnotations.append(theNote)
 
     def finalize(self):
         """Write out the various deferred objects and the pdf end matter."""
 
         self.endStream()
+        self._write_annotations()
         self.writeFonts()
         self.writeExtGSTates()
         self._write_soft_mask_groups()
@@ -816,6 +823,10 @@ def endStream(self):
             self.currentstream.end()
             self.currentstream = None
 
+    def _write_annotations(self):
+        for annotsObject, annotations in self._annotations:
+            self.writeObject(annotsObject, annotations)
+
     def fontName(self, fontprop):
         """
         Select a font based on fontprop and return a name suitable for
@@ -2095,6 +2106,19 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
         width, height, descent, glyphs, rects = \
             self._text2path.mathtext_parser.parse(s, 72, prop)
 
+        if gc.get_url() is not None:
+            link_annotation = {
+                'Type': Name('Annot'),
+                'Subtype': Name('Link'),
+                'Rect': (x, y, x + width, y + height),
+                'Border': [0, 0, 0],
+                'A': {
+                    'S': Name('URI'),
+                    'URI': gc.get_url(),
+                },
+            }
+            self.file._annotations[-1][1].append(link_annotation)
+
         global_fonttype = mpl.rcParams['pdf.fonttype']
 
         # Set up a global transformation matrix for the whole math expression
@@ -2151,6 +2175,19 @@ def draw_tex(self, gc, x, y, s, prop, angle, ismath='TeX!', mtext=None):
         with dviread.Dvi(dvifile, 72) as dvi:
             page, = dvi
 
+        if gc.get_url() is not None:
+            link_annotation = {
+                'Type': Name('Annot'),
+                'Subtype': Name('Link'),
+                'Rect': (x, y, x + page.width, y + page.height),
+                'Border': [0, 0, 0],
+                'A': {
+                    'S': Name('URI'),
+                    'URI': gc.get_url(),
+                },
+            }
+            self.file._annotations[-1][1].append(link_annotation)
+
         # Gather font information and do some setup for combining
         # characters into strings. The variable seq will contain a
         # sequence of font and text entries. A font entry is a list
@@ -2250,6 +2287,21 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
             if is_opentype_cff_font(font.fname):
                 fonttype = 42
 
+        if gc.get_url() is not None:
+            font.set_text(s)
+            width, height = font.get_width_height()
+            link_annotation = {
+                'Type': Name('Annot'),
+                'Subtype': Name('Link'),
+                'Rect': (x, y, x + width / 64, y + height / 64),
+                'Border': [0, 0, 0],
+                'A': {
+                    'S': Name('URI'),
+                    'URI': gc.get_url(),
+                },
+            }
+            self.file._annotations[-1][1].append(link_annotation)
+
         # If fonttype != 3 or there are no multibyte characters, emit the whole
         # string at once.
         if fonttype != 3 or all(ord(char) <= 255 for char in s):

diff --git a/lib/matplotlib/tests/test_backend_pdf.py b/lib/matplotlib/tests/test_backend_pdf.py
@@ -1,4 +1,5 @@
 import datetime
+import decimal
 import io
 import os
 from pathlib import Path
@@ -212,6 +213,53 @@ def test_multipage_metadata(monkeypatch):
     }
 
 
+def test_text_urls():
+    pikepdf = pytest.importorskip('pikepdf')
+
+    test_url = 'https://test_text_urls.matplotlib.org/'
+
+    fig = plt.figure(figsize=(2, 1))
+    fig.text(0.1, 0.1, 'test plain 123', url=f'{test_url}plain')
+    fig.text(0.1, 0.4, 'test mathtext $123$', url=f'{test_url}mathtext')
+
+    with io.BytesIO() as fd:
+        fig.savefig(fd, format='pdf')
+
+        with pikepdf.Pdf.open(fd) as pdf:
+            annots = pdf.pages[0].Annots
+
+    for y, fragment in [('0.1', 'plain'), ('0.4', 'mathtext')]:
+        annot = next(
+            (a for a in annots if a.A.URI == f'{test_url}{fragment}'),
+            None)
+        assert annot is not None
+        # Positions in points (72 per inch.)
+        assert annot.Rect[1] == decimal.Decimal(y) * 72
+
+
+@needs_usetex
+def test_text_urls_tex():
+    pikepdf = pytest.importorskip('pikepdf')
+
+    test_url = 'https://test_text_urls.matplotlib.org/'
+
+    fig = plt.figure(figsize=(2, 1))
+    fig.text(0.1, 0.7, 'test tex $123$', usetex=True, url=f'{test_url}tex')
+
+    with io.BytesIO() as fd:
+        fig.savefig(fd, format='pdf')
+
+        with pikepdf.Pdf.open(fd) as pdf:
+            annots = pdf.pages[0].Annots
+
+    annot = next(
+        (a for a in annots if a.A.URI == f'{test_url}tex'),
+        None)
+    assert annot is not None
+    # Positions in points (72 per inch.)
+    assert annot.Rect[1] == decimal.Decimal('0.7') * 72
+
+
 def test_pdfpages_fspath():
     with PdfPages(Path(os.devnull)) as pdf:
         pdf.savefig(plt.figure())