diff --git a/doc/api/next_api_changes/behavior/20634-JKS.rst b/doc/api/next_api_changes/behavior/20634-JKS.rst new file mode 100644 index 000000000000..ff4046445e42 --- /dev/null +++ b/doc/api/next_api_changes/behavior/20634-JKS.rst @@ -0,0 +1,8 @@ +``Type1Font`` objects now decrypt the encrypted part +---------------------------------------------------- + +Type 1 fonts have a large part of their code encrypted as an obsolete +copy-protection measure. This part is now available decrypted as the +``decrypted`` attribute of :class:`~matplotlib.type1font.Type1Font`. +This decrypted data is not yet parsed, but this is a prerequisite for +implementing subsetting. diff --git a/lib/matplotlib/tests/test_type1font.py b/lib/matplotlib/tests/test_type1font.py index 5766709c6cf8..99cc3e500b0e 100644 --- a/lib/matplotlib/tests/test_type1font.py +++ b/lib/matplotlib/tests/test_type1font.py @@ -15,6 +15,8 @@ def test_Type1Font(): assert font.parts[2] == rawdata[0x8985:0x8ba6] assert font.parts[1:] == slanted.parts[1:] assert font.parts[1:] == condensed.parts[1:] + assert font.decrypted.startswith(b'dup\n/Private 18 dict dup begin') + assert font.decrypted.endswith(b'mark currentfile closefile\n') differ = difflib.Differ() diff = list(differ.compare( @@ -67,3 +69,11 @@ def test_overprecision(): assert matrix == '0.001 0 0.000167 0.001 0 0' # and here we had -9.48090361795083 assert angle == '-9.4809' + + +def test_encrypt_decrypt_roundtrip(): + data = b'this is my plaintext \0\1\2\3' + encrypted = t1f.Type1Font._encrypt(data, 'eexec') + decrypted = t1f.Type1Font._decrypt(encrypted, 'eexec') + assert encrypted != decrypted + assert data == decrypted diff --git a/lib/matplotlib/type1font.py b/lib/matplotlib/type1font.py index a9ae51ea5303..f417c0fc97a4 100644 --- a/lib/matplotlib/type1font.py +++ b/lib/matplotlib/type1font.py @@ -24,13 +24,16 @@ import binascii import enum import itertools +import logging import re import struct import numpy as np from matplotlib.cbook import _format_approx +from . import _api +_log = logging.getLogger(__name__) # token types _TokenType = enum.Enum('_TokenType', @@ -46,10 +49,12 @@ class Type1Font: parts : tuple A 3-tuple of the cleartext part, the encrypted part, and the finale of zeros. + decrypted : bytes + The decrypted form of parts[1]. prop : dict[str, Any] A dictionary of font properties. """ - __slots__ = ('parts', 'prop') + __slots__ = ('parts', 'decrypted', 'prop') def __init__(self, input): """ @@ -68,6 +73,7 @@ def __init__(self, input): data = self._read(file) self.parts = self._split(data) + self.decrypted = self._decrypt(self.parts[1], 'eexec') self._parse() def _read(self, file): @@ -125,13 +131,16 @@ def _split(self, data): zeros -= 1 idx -= 1 if zeros: - raise RuntimeError('Insufficiently many zeros in Type 1 font') + # this may have been a problem on old implementations that + # used the zeros as necessary padding + _log.info('Insufficiently many zeros in Type 1 font') # Convert encrypted part to binary (if we read a pfb file, we may end # up converting binary to hexadecimal to binary again; but if we read # a pfa file, this part is already in hex, and I am not quite sure if # even the pfb format guarantees that it will be in binary). - binary = binascii.unhexlify(data[len1:idx+1]) + idx1 = len1 + ((idx - len1 + 2) & ~1) # ensure an even number of bytes + binary = binascii.unhexlify(data[len1:idx1]) return data[:len1], binary, data[idx+1:] @@ -139,6 +148,54 @@ def _split(self, data): _token_re = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+') _instring_re = re.compile(br'[()\\]') + @staticmethod + def _decrypt(ciphertext, key, ndiscard=4): + """ + Decrypt ciphertext using the Type-1 font algorithm + + The algorithm is described in Adobe's "Adobe Type 1 Font Format". + The key argument can be an integer, or one of the strings + 'eexec' and 'charstring', which map to the key specified for the + corresponding part of Type-1 fonts. + + The ndiscard argument should be an integer, usually 4. + That number of bytes is discarded from the beginning of plaintext. + """ + + key = _api.check_getitem({'eexec': 55665, 'charstring': 4330}, key=key) + plaintext = [] + for byte in ciphertext: + plaintext.append(byte ^ (key >> 8)) + key = ((key+byte) * 52845 + 22719) & 0xffff + + return bytes(plaintext[ndiscard:]) + + @staticmethod + def _encrypt(plaintext, key, ndiscard=4): + """ + Encrypt plaintext using the Type-1 font algorithm + + The algorithm is described in Adobe's "Adobe Type 1 Font Format". + The key argument can be an integer, or one of the strings + 'eexec' and 'charstring', which map to the key specified for the + corresponding part of Type-1 fonts. + + The ndiscard argument should be an integer, usually 4. That + number of bytes is prepended to the plaintext before encryption. + This function prepends NUL bytes for reproducibility, even though + the original algorithm uses random bytes, presumably to avoid + cryptanalysis. + """ + + key = _api.check_getitem({'eexec': 55665, 'charstring': 4330}, key=key) + ciphertext = [] + for byte in b'\0' * ndiscard + plaintext: + c = byte ^ (key >> 8) + ciphertext.append(c) + key = ((key + c) * 52845 + 22719) & 0xffff + + return bytes(ciphertext) + @classmethod def _tokens(cls, text): """