@@ -103,6 +103,57 @@ def font_as_file(font):
103103 return fh
104104
105105
106+ class GlyphMap :
107+ """
108+ A two-way glyph mapping.
109+
110+ The forward glyph map is from (character string, glyph index)-pairs to
111+ (subset index, subset character code)-pairs.
112+
113+ The inverse glyph map is from to (subset index, subset character code)-pairs to
114+ (character string, glyph index)-pairs.
115+ """
116+
117+ def __init__ (self ) -> None :
118+ self ._forward : dict [tuple [CharacterCodeType , GlyphIndexType ],
119+ tuple [int , CharacterCodeType ]] = {}
120+ self ._inverse : dict [tuple [int , CharacterCodeType ],
121+ tuple [CharacterCodeType , GlyphIndexType ]] = {}
122+
123+ def get (self , charcodes : str ,
124+ glyph_index : GlyphIndexType ) -> tuple [int , CharacterCodeType ] | None :
125+ """
126+ Get the forward mapping from a (character string, glyph index)-pair.
127+
128+ This may return *None* if the pair is not currently mapped.
129+ """
130+ return self ._forward .get ((charcodes , glyph_index ))
131+
132+ def iget (self , subset : int ,
133+ subset_charcode : CharacterCodeType ) -> tuple [str , GlyphIndexType ]:
134+ """Get the inverse mapping from a (subset, subset charcode)-pair."""
135+ return self ._inverse [(subset , subset_charcode )]
136+
137+ def add (self , charcode : str , glyph_index : GlyphIndexType , subset : int ,
138+ subset_charcode : CharacterCodeType ) -> None :
139+ """
140+ Add a mapping to this instance.
141+
142+ Parameters
143+ ----------
144+ charcode : CharacterCodeType
145+ The character code to record.
146+ glyph : GlyphIndexType
147+ The corresponding glyph index to record.
148+ subset : int
149+ The subset in which the subset character code resides.
150+ subset_charcode : CharacterCodeType
151+ The subset character code within the above subset.
152+ """
153+ self ._forward [(charcode , glyph_index )] = (subset , subset_charcode )
154+ self ._inverse [(subset , subset_charcode )] = (charcode , glyph_index )
155+
156+
106157class CharacterTracker :
107158 """
108159 Helper for font subsetting by the PDF and PS backends.
@@ -114,16 +165,20 @@ class CharacterTracker:
114165 ----------
115166 subset_size : int
116167 The size at which characters are grouped into subsets.
117- used : dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]]
168+ used : dict
118169 A dictionary of font files to character maps.
119170
120- The key is a font filename and subset within that font .
171+ The key is a font filename.
121172
122- The value is a dictionary mapping a character code to a glyph index. Note this
123- mapping is the inverse of FreeType, which maps glyph indices to character codes.
173+ The value is a list of dictionaries, each mapping at most *subset_size*
174+ character codes to glyph indices. Note this mapping is the inverse of FreeType,
175+ which maps glyph indices to character codes.
124176
125177 If *subset_size* is not set, then there will only be one subset per font
126178 filename.
179+ glyph_maps : dict
180+ A dictionary of font files to glyph maps. You probably will want to use the
181+ `.subset_to_unicode` method instead of this attribute.
127182 """
128183
129184 def __init__ (self , subset_size : int = 0 ):
@@ -134,7 +189,8 @@ def __init__(self, subset_size: int = 0):
134189 The maximum size that is supported for an embedded font. If provided, then
135190 characters will be grouped into these sized subsets.
136191 """
137- self .used : dict [tuple [str , int ], dict [CharacterCodeType , GlyphIndexType ]] = {}
192+ self .used : dict [str , list [dict [CharacterCodeType , GlyphIndexType ]]] = {}
193+ self .glyph_maps : dict [str , GlyphMap ] = {}
138194 self .subset_size = subset_size
139195
140196 def track (self , font : FT2Font , s : str ) -> list [tuple [int , CharacterCodeType ]]:
@@ -157,33 +213,24 @@ def track(self, font: FT2Font, s: str) -> list[tuple[int, CharacterCodeType]]:
157213 whole). If *subset_size* is not specified, then the subset will always be 0
158214 and the character codes will be returned from the string unchanged.
159215 """
160- font_glyphs = []
161- char_to_font = font ._get_fontmap (s )
162- for _c , _f in char_to_font .items ():
163- charcode = ord (_c )
164- glyph_index = _f .get_char_index (charcode )
165- if self .subset_size != 0 :
166- subset = charcode // self .subset_size
167- subset_charcode = charcode % self .subset_size
168- else :
169- subset = 0
170- subset_charcode = charcode
171- self .used .setdefault ((_f .fname , subset ), {})[subset_charcode ] = glyph_index
172- font_glyphs .append ((subset , subset_charcode ))
173- return font_glyphs
174-
175- def track_glyph (
176- self , font : FT2Font , charcode : CharacterCodeType ,
177- glyph : GlyphIndexType ) -> tuple [int , CharacterCodeType ]:
216+ return [
217+ self .track_glyph (f , ord (c ), f .get_char_index (ord (c )))
218+ for c , f in font ._get_fontmap (s ).items ()
219+ ]
220+
221+ def track_glyph (self , font : FT2Font , chars : str | CharacterCodeType ,
222+ glyph : GlyphIndexType ) -> tuple [int , CharacterCodeType ]:
178223 """
179224 Record character code *charcode* at glyph index *glyph* as using font *font*.
180225
181226 Parameters
182227 ----------
183228 font : FT2Font
184229 A font that is being used for the provided string.
185- charcode : CharacterCodeType
186- The character code to record.
230+ chars : str or CharacterCodeType
231+ The character(s) to record. This may be a single character code, or multiple
232+ characters in a string, if the glyph maps to several characters. It will be
233+ normalized to a string internally.
187234 glyph : GlyphIndexType
188235 The corresponding glyph index to record.
189236
@@ -196,33 +243,64 @@ def track_glyph(
196243 The character code within the above subset. If *subset_size* was not
197244 specified on this instance, then this is just *charcode* unmodified.
198245 """
199- if self .subset_size != 0 :
200- subset = charcode // self .subset_size
201- subset_charcode = charcode % self .subset_size
246+ if isinstance (chars , str ):
247+ charcode = ord (chars [0 ])
248+ else :
249+ charcode = chars
250+ chars = chr (chars )
251+
252+ glyph_map = self .glyph_maps .setdefault (font .fname , GlyphMap ())
253+ if result := glyph_map .get (chars , glyph ):
254+ return result
255+
256+ subset_maps = self .used .setdefault (font .fname , [{}])
257+ use_next_charmap = (
258+ # Multi-character glyphs always go in the non-0 subset.
259+ len (chars ) > 1 or
260+ # Default to preserving the character code as it was.
261+ self .subset_size != 0
262+ and (
263+ # But start filling a new subset if outside the first block; this
264+ # preserves ASCII (for Type 3) or the Basic Multilingual Plane (for
265+ # Type 42).
266+ charcode >= self .subset_size
267+ # Or, use a new subset if the character code is already mapped for the
268+ # first block. This means it's using an alternate glyph.
269+ or charcode in subset_maps [0 ]
270+ )
271+ )
272+ if use_next_charmap :
273+ if len (subset_maps ) == 1 or len (subset_maps [- 1 ]) == self .subset_size :
274+ subset_maps .append ({})
275+ subset = len (subset_maps ) - 1
276+ subset_charcode = len (subset_maps [- 1 ])
202277 else :
203278 subset = 0
204279 subset_charcode = charcode
205- self .used .setdefault ((font .fname , subset ), {})[subset_charcode ] = glyph
280+ subset_maps [subset ][subset_charcode ] = glyph
281+ glyph_map .add (chars , glyph , subset , subset_charcode )
206282 return (subset , subset_charcode )
207283
208- def subset_to_unicode (self , index : int ,
209- charcode : CharacterCodeType ) -> CharacterCodeType :
284+ def subset_to_unicode (self , fontname : str , subset : int ,
285+ subset_charcode : CharacterCodeType ) -> str :
210286 """
211287 Map a subset index and character code to a Unicode character code.
212288
213289 Parameters
214290 ----------
215- index : int
291+ fontname : str
292+ The name of the font, from the *used* dictionary key.
293+ subset : int
216294 The subset index within a font.
217- charcode : CharacterCodeType
295+ subset_charcode : CharacterCodeType
218296 The character code within a subset to map back.
219297
220298 Returns
221299 -------
222- CharacterCodeType
223- The Unicode character code corresponding to the subsetted one .
300+ str
301+ The Unicode character(s) corresponding to the subsetted character code .
224302 """
225- return index * self .subset_size + charcode
303+ return self .glyph_maps [ fontname ]. iget ( subset , subset_charcode )[ 0 ]
226304
227305
228306class RendererPDFPSBase (RendererBase ):
0 commit comments