From 3863a75a9dbe01ec3cade0bdd6f64bf40d435fd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Mon, 12 Jul 2021 20:00:49 +0300 Subject: [PATCH 1/2] Implement Type-1 decryption This is a prerequisite of subsetting. --- .../next_api_changes/behavior/20634-JKS.rst | 8 +++ lib/matplotlib/tests/test_type1font.py | 10 ++++ lib/matplotlib/type1font.py | 54 ++++++++++++++++++- 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 doc/api/next_api_changes/behavior/20634-JKS.rst diff --git a/doc/api/next_api_changes/behavior/20634-JKS.rst b/doc/api/next_api_changes/behavior/20634-JKS.rst new file mode 100644 index 000000000000..ff4046445e42 --- /dev/null +++ b/doc/api/next_api_changes/behavior/20634-JKS.rst @@ -0,0 +1,8 @@ +``Type1Font`` objects now decrypt the encrypted part +---------------------------------------------------- + +Type 1 fonts have a large part of their code encrypted as an obsolete +copy-protection measure. This part is now available decrypted as the +``decrypted`` attribute of :class:`~matplotlib.type1font.Type1Font`. +This decrypted data is not yet parsed, but this is a prerequisite for +implementing subsetting. diff --git a/lib/matplotlib/tests/test_type1font.py b/lib/matplotlib/tests/test_type1font.py index 5766709c6cf8..99cc3e500b0e 100644 --- a/lib/matplotlib/tests/test_type1font.py +++ b/lib/matplotlib/tests/test_type1font.py @@ -15,6 +15,8 @@ def test_Type1Font(): assert font.parts[2] == rawdata[0x8985:0x8ba6] assert font.parts[1:] == slanted.parts[1:] assert font.parts[1:] == condensed.parts[1:] + assert font.decrypted.startswith(b'dup\n/Private 18 dict dup begin') + assert font.decrypted.endswith(b'mark currentfile closefile\n') differ = difflib.Differ() diff = list(differ.compare( @@ -67,3 +69,11 @@ def test_overprecision(): assert matrix == '0.001 0 0.000167 0.001 0 0' # and here we had -9.48090361795083 assert angle == '-9.4809' + + +def test_encrypt_decrypt_roundtrip(): + data = b'this is my plaintext \0\1\2\3' + encrypted = t1f.Type1Font._encrypt(data, 'eexec') + decrypted = t1f.Type1Font._decrypt(encrypted, 'eexec') + assert encrypted != decrypted + assert data == decrypted diff --git a/lib/matplotlib/type1font.py b/lib/matplotlib/type1font.py index a9ae51ea5303..8ae4942637a6 100644 --- a/lib/matplotlib/type1font.py +++ b/lib/matplotlib/type1font.py @@ -30,6 +30,7 @@ import numpy as np from matplotlib.cbook import _format_approx +from . import _api # token types @@ -46,10 +47,12 @@ class Type1Font: parts : tuple A 3-tuple of the cleartext part, the encrypted part, and the finale of zeros. + decrypted : bytes + The decrypted form of parts[1]. prop : dict[str, Any] A dictionary of font properties. """ - __slots__ = ('parts', 'prop') + __slots__ = ('parts', 'decrypted', 'prop') def __init__(self, input): """ @@ -68,6 +71,7 @@ def __init__(self, input): data = self._read(file) self.parts = self._split(data) + self.decrypted = self._decrypt(self.parts[1], 'eexec') self._parse() def _read(self, file): @@ -139,6 +143,54 @@ def _split(self, data): _token_re = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+') _instring_re = re.compile(br'[()\\]') + @staticmethod + def _decrypt(ciphertext, key, ndiscard=4): + """ + Decrypt ciphertext using the Type-1 font algorithm + + The algorithm is described in Adobe's "Adobe Type 1 Font Format". + The key argument can be an integer, or one of the strings + 'eexec' and 'charstring', which map to the key specified for the + corresponding part of Type-1 fonts. + + The ndiscard argument should be an integer, usually 4. + That number of bytes is discarded from the beginning of plaintext. + """ + + key = _api.check_getitem({'eexec': 55665, 'charstring': 4330}, key=key) + plaintext = [] + for byte in ciphertext: + plaintext.append(byte ^ (key >> 8)) + key = ((key+byte) * 52845 + 22719) & 0xffff + + return bytes(plaintext[ndiscard:]) + + @staticmethod + def _encrypt(plaintext, key, ndiscard=4): + """ + Encrypt plaintext using the Type-1 font algorithm + + The algorithm is described in Adobe's "Adobe Type 1 Font Format". + The key argument can be an integer, or one of the strings + 'eexec' and 'charstring', which map to the key specified for the + corresponding part of Type-1 fonts. + + The ndiscard argument should be an integer, usually 4. That + number of bytes is prepended to the plaintext before encryption. + This function prepends NUL bytes for reproducibility, even though + the original algorithm uses random bytes, presumably to avoid + cryptanalysis. + """ + + key = _api.check_getitem({'eexec': 55665, 'charstring': 4330}, key=key) + ciphertext = [] + for byte in b'\0' * ndiscard + plaintext: + c = byte ^ (key >> 8) + ciphertext.append(c) + key = ((key + c) * 52845 + 22719) & 0xffff + + return bytes(ciphertext) + @classmethod def _tokens(cls, text): """ From e095ccd853cf557672db53dd991a8664349c1981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20K=2E=20Sepp=C3=A4nen?= Date: Thu, 22 Jul 2021 14:42:04 +0300 Subject: [PATCH 2/2] Fix two crashes on slightly broken font files The zeros at the end are not needed by our implementation so don't crash if there are too few. Always pass an even number of bytes to unhexlify. --- lib/matplotlib/type1font.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/matplotlib/type1font.py b/lib/matplotlib/type1font.py index 8ae4942637a6..f417c0fc97a4 100644 --- a/lib/matplotlib/type1font.py +++ b/lib/matplotlib/type1font.py @@ -24,6 +24,7 @@ import binascii import enum import itertools +import logging import re import struct @@ -32,6 +33,7 @@ from matplotlib.cbook import _format_approx from . import _api +_log = logging.getLogger(__name__) # token types _TokenType = enum.Enum('_TokenType', @@ -129,13 +131,16 @@ def _split(self, data): zeros -= 1 idx -= 1 if zeros: - raise RuntimeError('Insufficiently many zeros in Type 1 font') + # this may have been a problem on old implementations that + # used the zeros as necessary padding + _log.info('Insufficiently many zeros in Type 1 font') # Convert encrypted part to binary (if we read a pfb file, we may end # up converting binary to hexadecimal to binary again; but if we read # a pfa file, this part is already in hex, and I am not quite sure if # even the pfb format guarantees that it will be in binary). - binary = binascii.unhexlify(data[len1:idx+1]) + idx1 = len1 + ((idx - len1 + 2) & ~1) # ensure an even number of bytes + binary = binascii.unhexlify(data[len1:idx1]) return data[:len1], binary, data[idx+1:]