@@ -169,43 +169,64 @@ def wrapper(self, byte):
169
169
class Dvi (object ):
170
170
"""
171
171
A reader for a dvi ("device-independent") file, as produced by TeX.
172
- The current implementation can only iterate through pages in order,
173
- and does not even attempt to verify the postamble.
172
+ The current implementation can only iterate through pages in order.
174
173
175
174
This class can be used as a context manager to close the underlying
176
175
file upon exit. Pages can be read via iteration. Here is an overly
177
176
simple way to extract text without trying to detect whitespace::
178
177
179
178
>>> with matplotlib.dviread.Dvi('input.dvi', 72) as dvi:
180
179
>>> for page in dvi:
181
- >>> print(''.join(unichr(t.glyph) for t in page.text))
180
+ >>> print(''.join(chr(t.glyph) for t in page.text))
181
+
182
+ Parameters
183
+ ----------
184
+
185
+ filename : str
186
+ dvi file to read
187
+ dpi : number or None
188
+ Dots per inch, can be floating-point; this affects the
189
+ coordinates returned. Use None to get TeX's internal units
190
+ which are likely only useful for debugging.
191
+ cache : TeXSupportCache instance, optional
192
+ Support file cache instance, defaults to the TeXSupportCache
193
+ singleton.
182
194
"""
183
195
# dispatch table
184
196
_dtable = [None ] * 256
185
197
_dispatch = partial (_dispatch , _dtable )
186
198
187
- def __init__ (self , filename , dpi ):
199
+ def __init__ (self , filename , dpi , cache = None ):
188
200
"""
189
201
Read the data from the file named *filename* and convert
190
202
TeX's internal units to units of *dpi* per inch.
191
203
*dpi* only sets the units and does not limit the resolution.
192
204
Use None to return TeX's internal units.
193
205
"""
194
206
_log .debug ('Dvi: %s' , filename )
207
+ if cache is None :
208
+ cache = TeXSupportCache .get_cache ()
209
+ self .cache = cache
195
210
self .file = open (filename , 'rb' )
196
211
self .dpi = dpi
197
212
self .fonts = {}
198
213
self .state = _dvistate .pre
199
214
self .baseline = self ._get_baseline (filename )
215
+ self .fontnames = sorted (set (self ._read_fonts ()))
216
+ # populate kpsewhich cache with font pathnames
217
+ find_tex_files ([x + suffix for x in self .fontnames
218
+ for suffix in ('.tfm' , '.vf' , '.pfb' )],
219
+ cache )
220
+ cache .optimize ()
200
221
201
222
def _get_baseline (self , filename ):
202
223
if rcParams ['text.latex.preview' ]:
203
224
base , ext = os .path .splitext (filename )
204
225
baseline_filename = base + ".baseline"
205
226
if os .path .exists (baseline_filename ):
206
227
with open (baseline_filename , 'rb' ) as fd :
207
- l = fd .read ().split ()
208
- height , depth , width = l
228
+ line = fd .read ().split ()
229
+ height , depth , width = line
209
230
return float (depth )
210
231
return None
211
232
@@ -292,6 +313,61 @@ def _output(self):
292
313
return Page (text = text , boxes = boxes , width = (maxx - minx )* d ,
293
314
height = (maxy_pure - miny )* d , descent = descent )
294
315
316
+ def _read_fonts (self ):
317
+ """Read the postamble of the file and return a list of fonts used."""
318
+
319
+ file = self .file
320
+ offset = - 1
321
+ while offset > - 100 :
322
+ file .seek (offset , 2 )
323
+ byte = file .read (1 )[0 ]
324
+ if byte != 223 :
325
+ break
326
+ offset -= 1
327
+ if offset >= - 4 :
328
+ raise ValueError (
329
+ "malformed dvi file %s: too few 223 bytes" % file .name )
330
+ if byte != 2 :
331
+ raise ValueError (
332
+ ("malformed dvi file %s: post-postamble "
333
+ "identification byte not 2" ) % file .name )
334
+ file .seek (offset - 4 , 2 )
335
+ offset = struct .unpack ('!I' , file .read (4 ))[0 ]
336
+ file .seek (offset , 0 )
337
+ try :
338
+ byte = file .read (1 )[0 ]
339
+ except IndexError :
340
+ raise ValueError (
341
+ "malformed dvi file %s: postamble offset %d out of range"
342
+ % (file .name , offset ))
343
+ if byte != 248 :
344
+ raise ValueError (
345
+ "malformed dvi file %s: postamble not found at offset %d"
346
+ % (file .name , offset ))
347
+
348
+ fonts = []
349
+ file .seek (28 , 1 )
350
+ while True :
351
+ byte = file .read (1 )[0 ]
352
+ if 243 <= byte <= 246 :
353
+ _ , _ , _ , _ , a , length = (
354
+ _arg_olen1 (self , byte - 243 ),
355
+ _arg (4 , False , self , None ),
356
+ _arg (4 , False , self , None ),
357
+ _arg (4 , False , self , None ),
358
+ _arg (1 , False , self , None ),
359
+ _arg (1 , False , self , None ))
360
+ fontname = file .read (a + length )[- length :].decode ('ascii' )
361
+ fonts .append (fontname )
362
+ elif byte == 249 :
363
+ break
364
+ else :
365
+ raise ValueError (
366
+ "malformed dvi file %s: opcode %d in postamble"
367
+ % (file .name , byte ))
368
+ file .seek (0 , 0 )
369
+ return fonts
370
+
295
371
def _read (self ):
296
372
"""
297
373
Read one page from the file. Return True if successful,
@@ -591,6 +667,10 @@ class Vf(Dvi):
591
667
----------
592
668
593
669
filename : string or bytestring
670
+ vf file to read
671
+ cache : TeXSupportCache instance, optional
672
+ Support file cache instance, defaults to the TeXSupportCache
673
+ singleton.
594
674
595
675
Notes
596
676
-----
@@ -601,8 +681,8 @@ class Vf(Dvi):
601
681
but replaces the `_read` loop and dispatch mechanism.
602
682
"""
603
683
604
- def __init__ (self , filename ):
605
- Dvi .__init__ (self , filename , 0 )
684
+ def __init__ (self , filename , cache = None ):
685
+ Dvi .__init__ (self , filename , dpi = 0 , cache = cache )
606
686
try :
607
687
self ._first_font = None
608
688
self ._chars = {}
@@ -613,6 +693,27 @@ def __init__(self, filename):
613
693
def __getitem__ (self , code ):
614
694
return self ._chars [code ]
615
695
696
+ def _read_fonts (self ):
697
+ """Read through the font-definition section of the vf file
698
+ and return the list of font names."""
699
+ fonts = []
700
+ self .file .seek (0 , 0 )
701
+ while True :
702
+ byte = self .file .read (1 )[0 ]
703
+ if byte <= 242 or byte >= 248 :
704
+ break
705
+ elif 243 <= byte <= 246 :
706
+ _ = self ._arg (byte - 242 )
707
+ _ , _ , _ , a , length = [self ._arg (x ) for x in (4 , 4 , 4 , 1 , 1 )]
708
+ fontname = self .file .read (a + length )[- length :].decode ('ascii' )
709
+ fonts .append (fontname )
710
+ elif byte == 247 :
711
+ _ , k = self ._arg (1 ), self ._arg (1 )
712
+ _ = self .file .read (k )
713
+ _ , _ = self ._arg (4 ), self ._arg (4 )
714
+ self .file .seek (0 , 0 )
715
+ return fonts
716
+
616
717
def _read (self ):
617
718
"""
618
719
Read one page from the file. Return True if successful,
@@ -650,8 +751,8 @@ def _read(self):
650
751
self ._init_packet (packet_len )
651
752
elif 243 <= byte <= 246 :
652
753
k = self ._arg (byte - 242 , byte == 246 )
653
- c , s , d , a , l = [self ._arg (x ) for x in (4 , 4 , 4 , 1 , 1 )]
654
- self ._fnt_def_real (k , c , s , d , a , l )
754
+ c , s , d , a , length = [self ._arg (x ) for x in (4 , 4 , 4 , 1 , 1 )]
755
+ self ._fnt_def_real (k , c , s , d , a , length )
655
756
if self ._first_font is None :
656
757
self ._first_font = k
657
758
elif byte == 247 : # preamble
0 commit comments