Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 997df25

Browse files
committed
Make AFM parser both more compliant and less strict.
See changelog entry. Also support comma as decimal separator in the floating-point fields, as it is used in certain real-world files.
1 parent b9045cd commit 997df25

File tree

3 files changed

+45
-23
lines changed

3 files changed

+45
-23
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Changes in AFM parsing
2+
``````````````````````
3+
4+
In accordance with the AFM spec, the AFM parser no longer truncates the
5+
``UnderlinePosition`` and ``UnderlineThickness`` fields to integers.
6+
7+
The ``Notice`` field (which can only be publically accessed by the deprecated
8+
``afm.parse_afm`` API) is no longer decoded to a `str`, but instead kept as
9+
`bytes`, to support non-conformant AFM files that use non-ASCII characters in
10+
that field.

lib/matplotlib/afm.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,24 @@
4949
_log = logging.getLogger(__name__)
5050

5151

52-
# some afm files have floats where we are expecting ints -- there is
53-
# probably a better way to handle this (support floats, round rather
54-
# than truncate). But I don't know what the best approach is now and
55-
# this change to _to_int should at least prevent mpl from crashing on
56-
# these JDH (2009-11-06)
57-
5852
def _to_int(x):
53+
# Some AFM files have floats where we are expecting ints -- there is
54+
# probably a better way to handle this (support floats, round rather
55+
# than truncate). But I don't know what the best approach is now and
56+
# this change to _to_int should at least prevent mpl from crashing on
57+
# these JDH (2009-11-06)
5958
return int(float(x))
6059

6160

62-
_to_float = float
61+
def _to_float(x):
62+
# Some AFM files use "," instead of "." as decimal separator -- this
63+
# shouldn't be ambiguous (unless someone is wicked enough to use "," as
64+
# thousands separator...).
65+
if isinstance(x, bytes):
66+
# Encoding doesn't really matter -- if we have codepoints >127 the call
67+
# to float() will error anyways.
68+
x = x.decode('latin-1')
69+
return float(x.replace(',', '.'))
6370

6471

6572
def _to_str(x):
@@ -84,18 +91,15 @@ def _to_bool(s):
8491

8592
def _sanity_check(fh):
8693
"""
87-
Check if the file at least looks like AFM.
88-
If not, raise `RuntimeError`.
94+
Check if the file looks like AFM; if it doesn't, raise `RuntimeError`.
8995
"""
90-
9196
# Remember the file position in case the caller wants to
9297
# do something else with the file.
9398
pos = fh.tell()
9499
try:
95100
line = next(fh)
96101
finally:
97102
fh.seek(pos, 0)
98-
99103
# AFM spec, Section 4: The StartFontMetrics keyword [followed by a
100104
# version number] must be the first line in the file, and the
101105
# EndFontMetrics keyword must be the last non-empty line in the
@@ -122,7 +126,7 @@ def _parse_header(fh):
122126
XHeight, Ascender, Descender, StartCharMetrics
123127
124128
"""
125-
headerConverters = {
129+
header_converters = {
126130
b'StartFontMetrics': _to_float,
127131
b'FontName': _to_str,
128132
b'FullName': _to_str,
@@ -131,10 +135,13 @@ def _parse_header(fh):
131135
b'ItalicAngle': _to_float,
132136
b'IsFixedPitch': _to_bool,
133137
b'FontBBox': _to_list_of_ints,
134-
b'UnderlinePosition': _to_int,
135-
b'UnderlineThickness': _to_int,
138+
b'UnderlinePosition': _to_float,
139+
b'UnderlineThickness': _to_float,
136140
b'Version': _to_str,
137-
b'Notice': _to_str,
141+
# Some AFM files have non-ASCII characters (which are not allowed by
142+
# the spec). Given that there is actually no public API to even access
143+
# this field, just return it as straight bytes.
144+
b'Notice': lambda x: x,
138145
b'EncodingScheme': _to_str,
139146
b'CapHeight': _to_float, # Is the second version a mistake, or
140147
b'Capheight': _to_float, # do some AFM files contain 'Capheight'? -JKS
@@ -162,13 +169,15 @@ def _parse_header(fh):
162169
val = b''
163170

164171
try:
165-
d[key] = headerConverters[key](val)
166-
except ValueError:
167-
_log.error('Value error parsing header in AFM: %s, %s', key, val)
168-
continue
172+
converter = header_converters[key]
169173
except KeyError:
170174
_log.error('Found an unknown keyword in AFM header (was %r)' % key)
171175
continue
176+
try:
177+
d[key] = converter(val)
178+
except ValueError:
179+
_log.error('Value error parsing header in AFM: %s, %s', key, val)
180+
continue
172181
if key == b'StartCharMetrics':
173182
return d
174183
raise RuntimeError('Bad parse')

lib/matplotlib/tests/test_afm.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
from matplotlib import font_manager as fm
55

66

7+
# See note in afm.py re: use of comma as decimal separator in the
8+
# UnderlineThickness field and re: use of non-ASCII characters in the Notice
9+
# field.
710
AFM_TEST_DATA = b"""StartFontMetrics 2.0
811
Comment Comments are ignored.
912
Comment Creation Date:Mon Nov 13 12:34:11 GMT 2017
@@ -15,9 +18,9 @@
1518
ItalicAngle 0.0
1619
IsFixedPitch false
1720
UnderlinePosition -100
18-
UnderlineThickness 50
21+
UnderlineThickness 56,789
1922
Version 001.000
20-
Notice Copyright (c) 2017 No one.
23+
Notice Copyright \xa9 2017 No one.
2124
FontBBox 0 -321 1234 369
2225
StartCharMetrics 3
2326
C 0 ; WX 250 ; N space ; B 0 0 0 0 ;
@@ -51,9 +54,9 @@ def test_parse_header():
5154
b'ItalicAngle': 0.0,
5255
b'IsFixedPitch': False,
5356
b'UnderlinePosition': -100,
54-
b'UnderlineThickness': 50,
57+
b'UnderlineThickness': 56.789,
5558
b'Version': '001.000',
56-
b'Notice': 'Copyright (c) 2017 No one.',
59+
b'Notice': b'Copyright \xa9 2017 No one.',
5760
b'FontBBox': [0, -321, 1234, 369],
5861
b'StartCharMetrics': 3,
5962
}

0 commit comments

Comments
 (0)