Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5a01c86

Browse files
authored
Merge pull request #17221 from QuLogic/svg-metadata
ENH: Add metadata saving support to SVG.
2 parents f42b24f + 207013d commit 5a01c86

4 files changed

Lines changed: 246 additions & 21 deletions

File tree

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Saving SVG now supports adding metadata
2+
---------------------------------------
3+
4+
When saving SVG files, metadata can now be passed which will be saved in the
5+
file using `Dublin Core`_ and `RDF`_. A list of valid metadata can be found in
6+
the documentation for `.FigureCanvasSVG.print_svg`.
7+
8+
.. _Dublin Core: https://www.dublincore.org/specifications/dublin-core/
9+
.. _RDF: https://www.w3.org/1999/.status/PR-rdf-syntax-19990105/status

lib/matplotlib/backends/backend_svg.py

Lines changed: 145 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from collections import OrderedDict
22
import base64
3+
import datetime
34
import gzip
45
import hashlib
56
from io import BytesIO, StringIO, TextIOWrapper
67
import itertools
78
import logging
9+
import os
810
import re
911
import uuid
1012

@@ -17,6 +19,7 @@
1719
_Backend, FigureCanvasBase, FigureManagerBase, RendererBase)
1820
from matplotlib.backends.backend_mixed import MixedModeRenderer
1921
from matplotlib.colors import rgb2hex
22+
from matplotlib.dates import UTC
2023
from matplotlib.font_manager import findfont, get_font
2124
from matplotlib.ft2font import LOAD_NO_HINTING
2225
from matplotlib.mathtext import MathTextParser
@@ -273,7 +276,8 @@ def generate_css(attrib={}):
273276

274277

275278
class RendererSVG(RendererBase):
276-
def __init__(self, width, height, svgwriter, basename=None, image_dpi=72):
279+
def __init__(self, width, height, svgwriter, basename=None, image_dpi=72,
280+
*, metadata=None):
277281
self.width = width
278282
self.height = height
279283
self.writer = XMLWriter(svgwriter)
@@ -304,6 +308,7 @@ def __init__(self, width, height, svgwriter, basename=None, image_dpi=72):
304308
xmlns="http://www.w3.org/2000/svg",
305309
version="1.1",
306310
attrib={'xmlns:xlink': "http://www.w3.org/1999/xlink"})
311+
self._write_metadata(metadata)
307312
self._write_default_style()
308313

309314
def finalize(self):
@@ -312,6 +317,112 @@ def finalize(self):
312317
self.writer.close(self._start_id)
313318
self.writer.flush()
314319

320+
def _write_metadata(self, metadata):
321+
# Add metadata following the Dublin Core Metadata Initiative, and the
322+
# Creative Commons Rights Expression Language. This is mainly for
323+
# compatibility with Inkscape.
324+
if metadata is None:
325+
metadata = {}
326+
metadata = {
327+
'Format': 'image/svg+xml',
328+
'Type': 'http://purl.org/dc/dcmitype/StillImage',
329+
'Creator':
330+
f'Matplotlib v{mpl.__version__}, https://matplotlib.org/',
331+
**metadata
332+
}
333+
writer = self.writer
334+
335+
if 'Title' in metadata:
336+
writer.element('title', text=metadata['Title'], indent=False)
337+
338+
# Special handling.
339+
date = metadata.get('Date', None)
340+
if date is not None:
341+
if isinstance(date, str):
342+
dates = [date]
343+
elif isinstance(date, (datetime.datetime, datetime.date)):
344+
dates = [date.isoformat()]
345+
elif np.iterable(date):
346+
dates = []
347+
for d in date:
348+
if isinstance(d, str):
349+
dates.append(d)
350+
elif isinstance(d, (datetime.datetime, datetime.date)):
351+
dates.append(d.isoformat())
352+
else:
353+
raise ValueError(
354+
'Invalid type for Date metadata. '
355+
'Expected iterable of str, date, or datetime, '
356+
'not {!r}.'.format(type(d)))
357+
else:
358+
raise ValueError('Invalid type for Date metadata. '
359+
'Expected str, date, datetime, or iterable '
360+
'of the same, not {!r}.'.format(type(date)))
361+
metadata['Date'] = '/'.join(dates)
362+
else:
363+
# Get source date from SOURCE_DATE_EPOCH, if set.
364+
# See https://reproducible-builds.org/specs/source-date-epoch/
365+
date = os.getenv("SOURCE_DATE_EPOCH")
366+
if date:
367+
date = datetime.datetime.utcfromtimestamp(int(date))
368+
metadata['Date'] = date.replace(tzinfo=UTC).isoformat()
369+
else:
370+
metadata['Date'] = datetime.datetime.today().isoformat()
371+
372+
mid = writer.start('metadata')
373+
writer.start('rdf:RDF', attrib={
374+
'xmlns:dc': "http://purl.org/dc/elements/1.1/",
375+
'xmlns:cc': "http://creativecommons.org/ns#",
376+
'xmlns:rdf': "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
377+
})
378+
writer.start('cc:Work')
379+
380+
uri = metadata.pop('Type', None)
381+
if uri is not None:
382+
writer.element('dc:type', attrib={'rdf:resource': uri})
383+
384+
# Single value only.
385+
for key in ['title', 'coverage', 'date', 'description', 'format',
386+
'identifier', 'language', 'relation', 'source']:
387+
info = metadata.pop(key.title(), None)
388+
if info is not None:
389+
writer.element(f'dc:{key}', text=info, indent=False)
390+
391+
# Multiple Agent values.
392+
for key in ['creator', 'contributor', 'publisher', 'rights']:
393+
agents = metadata.pop(key.title(), None)
394+
if agents is None:
395+
continue
396+
397+
if isinstance(agents, str):
398+
agents = [agents]
399+
400+
writer.start(f'dc:{key}')
401+
for agent in agents:
402+
writer.start('cc:Agent')
403+
writer.element('dc:title', text=agent, indent=False)
404+
writer.end('cc:Agent')
405+
writer.end(f'dc:{key}')
406+
407+
# Multiple values.
408+
keywords = metadata.pop('Keywords', None)
409+
if keywords is not None:
410+
if isinstance(keywords, str):
411+
keywords = [keywords]
412+
413+
writer.start('dc:subject')
414+
writer.start('rdf:Bag')
415+
for keyword in keywords:
416+
writer.element('rdf:li', text=keyword, indent=False)
417+
writer.end('rdf:Bag')
418+
writer.end('dc:subject')
419+
420+
writer.close(mid)
421+
422+
if metadata:
423+
raise ValueError('Unknown metadata key(s) passed to SVG writer: ' +
424+
','.join(metadata))
425+
315426
def _write_default_style(self):
316427
writer = self.writer
317428
default_style = generate_css({
@@ -1163,6 +1274,36 @@ class FigureCanvasSVG(FigureCanvasBase):
11631274
fixed_dpi = 72
11641275

11651276
def print_svg(self, filename, *args, **kwargs):
1277+
"""
1278+
Parameters
1279+
----------
1280+
filename : str or path-like or file-like
1281+
Output target; if a string, a file will be opened for writing.
1282+
metadata : Dict[str, Any], optional
1283+
Metadata in the SVG file defined as key-value pairs of strings,
1284+
datetimes, or lists of strings, e.g., ``{'Creator': 'My software',
1285+
'Contributor': ['Me', 'My Friend'], 'Title': 'Awesome'}``.
1286+
1287+
The standard keys and their value types are:
1288+
1289+
* *str*: ``'Coverage'``, ``'Description'``, ``'Format'``,
1290+
``'Identifier'``, ``'Language'``, ``'Relation'``, ``'Source'``,
1291+
``'Title'``, and ``'Type'``.
1292+
* *str* or *list of str*: ``'Contributor'``, ``'Creator'``,
1293+
``'Keywords'``, ``'Publisher'``, and ``'Rights'``.
1294+
* *str*, *date*, *datetime*, or *tuple* of same: ``'Date'``. If a
1295+
non-*str*, then it will be formatted as ISO 8601.
1296+
1297+
Values have been predefined for ``'Creator'``, ``'Date'``,
1298+
``'Format'``, and ``'Type'``. They can be removed by setting them
1299+
to `None`.
1300+
1301+
Information is encoded as `Dublin Core Metadata`__.
1302+
1303+
.. _DC: https://www.dublincore.org/specifications/dublin-core/
1304+
1305+
__ DC_
1306+
"""
11661307
with cbook.open_file_cm(filename, "w", encoding="utf-8") as fh:
11671308

11681309
filename = getattr(fh, 'name', '')
@@ -1187,15 +1328,15 @@ def print_svgz(self, filename, *args, **kwargs):
11871328
gzip.GzipFile(mode='w', fileobj=fh) as gzipwriter:
11881329
return self.print_svg(gzipwriter)
11891330

1190-
def _print_svg(
1191-
self, filename, fh, *, dpi=72, bbox_inches_restore=None, **kwargs):
1331+
def _print_svg(self, filename, fh, *, dpi=72, bbox_inches_restore=None,
1332+
metadata=None, **kwargs):
11921333
self.figure.set_dpi(72.0)
11931334
width, height = self.figure.get_size_inches()
11941335
w, h = width * 72, height * 72
11951336

11961337
renderer = MixedModeRenderer(
11971338
self.figure, width, height, dpi,
1198-
RendererSVG(w, h, fh, filename, dpi),
1339+
RendererSVG(w, h, fh, filename, dpi, metadata=metadata),
11991340
bbox_inches_restore=bbox_inches_restore)
12001341

12011342
self.figure.draw(renderer)

lib/matplotlib/figure.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2081,6 +2081,8 @@ def savefig(self, fname, *, transparent=None, **kwargs):
20812081
`~.FigureCanvasAgg.print_png`.
20822082
- 'pdf' with pdf backend: See the parameter ``metadata`` of
20832083
`~.backend_pdf.PdfPages`.
2084+
- 'svg' with svg backend: See the parameter ``metadata`` of
2085+
`~.FigureCanvasSVG.print_svg`.
20842086
- 'eps' and 'ps' with PS backend: Only 'Creator' is supported.
20852087
20862088
pil_kwargs : dict, optional

lib/matplotlib/tests/test_backend_svg.py

Lines changed: 90 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
import datetime
12
from io import BytesIO
23
import re
34
import tempfile
5+
import xml.etree.ElementTree
46
import xml.parsers.expat
57

68
import numpy as np
@@ -29,12 +31,9 @@ def test_visibility():
2931
for artist in b:
3032
artist.set_visible(False)
3133

32-
fd = BytesIO()
33-
fig.savefig(fd, format='svg')
34-
35-
fd.seek(0)
36-
buf = fd.read()
37-
fd.close()
34+
with BytesIO() as fd:
35+
fig.savefig(fd, format='svg')
36+
buf = fd.getvalue()
3837

3938
parser = xml.parsers.expat.ParserCreate()
4039
parser.Parse(buf) # this will raise ExpatError if the svg is invalid
@@ -63,11 +62,9 @@ def test_text_urls():
6362
test_url = "http://test_text_urls.matplotlib.org"
6463
fig.suptitle("test_text_urls", url=test_url)
6564

66-
fd = BytesIO()
67-
fig.savefig(fd, format='svg')
68-
fd.seek(0)
69-
buf = fd.read().decode()
70-
fd.close()
65+
with BytesIO() as fd:
66+
fig.savefig(fd, format='svg')
67+
buf = fd.getvalue().decode()
7168

7269
expected = '<a xlink:href="{0}">'.format(test_url)
7370
assert expected in buf
@@ -175,11 +172,9 @@ def test_gid():
175172
gdic[gid] = obj
176173
obj.set_gid(gid)
177174

178-
fd = BytesIO()
179-
fig.savefig(fd, format='svg')
180-
fd.seek(0)
181-
buf = fd.read().decode()
182-
fd.close()
175+
with BytesIO() as fd:
176+
fig.savefig(fd, format='svg')
177+
buf = fd.getvalue().decode()
183178

184179
def include(gid, obj):
185180
# we need to exclude certain objects which will not appear in the svg
@@ -236,7 +231,9 @@ def test_url():
236231
assert b'http://example.com/' + v in b
237232

238233

239-
def test_url_tick():
234+
def test_url_tick(monkeypatch):
235+
monkeypatch.setenv('SOURCE_DATE_EPOCH', '19680801')
236+
240237
fig1, ax = plt.subplots()
241238
ax.scatter([1, 2, 3], [4, 5, 6])
242239
for i, tick in enumerate(ax.yaxis.get_major_ticks()):
@@ -259,3 +256,79 @@ def test_url_tick():
259256
for i in range(len(ax.yaxis.get_major_ticks())):
260257
assert f'http://example.com/{i}'.encode('ascii') in b1
261258
assert b1 == b2
259+
260+
261+
def test_svg_default_metadata(monkeypatch):
262+
# Values have been predefined for 'Creator', 'Date', 'Format', and 'Type'.
263+
monkeypatch.setenv('SOURCE_DATE_EPOCH', '19680801')
264+
265+
fig, ax = plt.subplots()
266+
with BytesIO() as fd:
267+
fig.savefig(fd, format='svg')
268+
buf = fd.getvalue().decode()
269+
270+
# Creator
271+
assert mpl.__version__ in buf
272+
# Date
273+
assert '1970-08-16' in buf
274+
# Format
275+
assert 'image/svg+xml' in buf
276+
# Type
277+
assert 'StillImage' in buf
278+
279+
280+
def test_svg_metadata():
281+
single_value = ['Coverage', 'Identifier', 'Language', 'Relation', 'Source',
282+
'Title', 'Type']
283+
multi_value = ['Contributor', 'Creator', 'Keywords', 'Publisher', 'Rights']
284+
metadata = {
285+
'Date': [datetime.date(1968, 8, 1),
286+
datetime.datetime(1968, 8, 2, 1, 2, 3)],
287+
'Description': 'description\ntext',
288+
**{k: f'{k} foo' for k in single_value},
289+
**{k: [f'{k} bar', f'{k} baz'] for k in multi_value},
290+
}
291+
292+
fig, ax = plt.subplots()
293+
with BytesIO() as fd:
294+
fig.savefig(fd, format='svg', metadata=metadata)
295+
buf = fd.getvalue().decode()
296+
297+
SVGNS = '{http://www.w3.org/2000/svg}'
298+
RDFNS = '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}'
299+
CCNS = '{http://creativecommons.org/ns#}'
300+
DCNS = '{http://purl.org/dc/elements/1.1/}'
301+
302+
root = xml.etree.ElementTree.fromstring(buf)
303+
rdf, = root.findall(f'./{SVGNS}metadata/{RDFNS}RDF')
304+
305+
# Check things that are single entries.
306+
titles = [node.text for node in root.findall(f'./{SVGNS}title')]
307+
assert titles == [metadata['Title']]
308+
types = [node.attrib[f'{RDFNS}resource']
309+
for node in rdf.findall(f'./{CCNS}Work/{DCNS}type')]
310+
assert types == [metadata['Type']]
311+
for k in ['Description', *single_value]:
312+
if k == 'Type':
313+
continue
314+
values = [node.text
315+
for node in rdf.findall(f'./{CCNS}Work/{DCNS}{k.lower()}')]
316+
assert values == [metadata[k]]
317+
318+
# Check things that are multi-value entries.
319+
for k in multi_value:
320+
if k == 'Keywords':
321+
continue
322+
values = [
323+
node.text
324+
for node in rdf.findall(
325+
f'./{CCNS}Work/{DCNS}{k.lower()}/{CCNS}Agent/{DCNS}title')]
326+
assert values == metadata[k]
327+
328+
# Check special things.
329+
dates = [node.text for node in rdf.findall(f'./{CCNS}Work/{DCNS}date')]
330+
assert dates == ['1968-08-01/1968-08-02T01:02:03']
331+
332+
values = [node.text for node in
333+
rdf.findall(f'./{CCNS}Work/{DCNS}subject/{RDFNS}Bag/{RDFNS}li')]
334+
assert values == metadata['Keywords']

0 commit comments

Comments
 (0)