@@ -171,43 +171,64 @@ def wrapper(self, byte):
171
171
class Dvi (object ):
172
172
"""
173
173
A reader for a dvi ("device-independent") file, as produced by TeX.
174
- The current implementation can only iterate through pages in order,
175
- and does not even attempt to verify the postamble.
174
+ The current implementation can only iterate through pages in order.
176
175
177
176
This class can be used as a context manager to close the underlying
178
177
file upon exit. Pages can be read via iteration. Here is an overly
179
178
simple way to extract text without trying to detect whitespace::
180
179
181
180
>>> with matplotlib.dviread.Dvi('input.dvi', 72) as dvi:
182
181
>>> for page in dvi:
183
- >>> print(''.join(unichr(t.glyph) for t in page.text))
182
+ >>> print(''.join(chr(t.glyph) for t in page.text))
183
+
184
+ Parameters
185
+ ----------
186
+
187
+ filename : str
188
+ dvi file to read
189
+ dpi : number or None
190
+ Dots per inch, can be floating-point; this affects the
191
+ coordinates returned. Use None to get TeX's internal units
192
+ which are likely only useful for debugging.
193
+ cache : TeXSupportCache instance, optional
194
+ Support file cache instance, defaults to the TeXSupportCache
195
+ singleton.
184
196
"""
185
197
# dispatch table
186
198
_dtable = [None ] * 256
187
199
_dispatch = partial (_dispatch , _dtable )
188
200
189
- def __init__ (self , filename , dpi ):
201
+ def __init__ (self , filename , dpi , cache = None ):
190
202
"""
191
203
Read the data from the file named *filename* and convert
192
204
TeX's internal units to units of *dpi* per inch.
193
205
*dpi* only sets the units and does not limit the resolution.
194
206
Use None to return TeX's internal units.
195
207
"""
196
208
_log .debug ('Dvi: %s' , filename )
209
+ if cache is None :
210
+ cache = TeXSupportCache .get_cache ()
211
+ self .cache = cache
197
212
self .file = open (filename , 'rb' )
198
213
self .dpi = dpi
199
214
self .fonts = {}
200
215
self .state = _dvistate .pre
201
216
self .baseline = self ._get_baseline (filename )
217
+ self .fontnames = sorted (set (self ._read_fonts ()))
218
+ # populate kpsewhich cache with font pathnames
219
+ find_tex_files ([x + suffix for x in self .fontnames
220
+ for suffix in ('.tfm' , '.vf' , '.pfb' )],
221
+ cache )
222
+ cache .optimize ()
202
223
203
224
def _get_baseline (self , filename ):
204
225
if rcParams ['text.latex.preview' ]:
205
226
base , ext = os .path .splitext (filename )
206
227
baseline_filename = base + ".baseline"
207
228
if os .path .exists (baseline_filename ):
208
229
with open (baseline_filename , 'rb' ) as fd :
209
- l = fd .read ().split ()
210
- height , depth , width = l
230
+ line = fd .read ().split ()
231
+ height , depth , width = line
211
232
return float (depth )
212
233
return None
213
234
@@ -294,6 +315,61 @@ def _output(self):
294
315
return Page (text = text , boxes = boxes , width = (maxx - minx )* d ,
295
316
height = (maxy_pure - miny )* d , descent = descent )
296
317
318
+ def _read_fonts (self ):
319
+ """Read the postamble of the file and return a list of fonts used."""
320
+
321
+ file = self .file
322
+ offset = - 1
323
+ while offset > - 100 :
324
+ file .seek (offset , 2 )
325
+ byte = file .read (1 )[0 ]
326
+ if byte != 223 :
327
+ break
328
+ offset -= 1
329
+ if offset >= - 4 :
330
+ raise ValueError (
331
+ "malformed dvi file %s: too few 223 bytes" % file .name )
332
+ if byte != 2 :
333
+ raise ValueError (
334
+ ("malformed dvi file %s: post-postamble "
335
+ "identification byte not 2" ) % file .name )
336
+ file .seek (offset - 4 , 2 )
337
+ offset = struct .unpack ('!I' , file .read (4 ))[0 ]
338
+ file .seek (offset , 0 )
339
+ try :
340
+ byte = file .read (1 )[0 ]
341
+ except IndexError :
342
+ raise ValueError (
343
+ "malformed dvi file %s: postamble offset %d out of range"
344
+ % (file .name , offset ))
345
+ if byte != 248 :
346
+ raise ValueError (
347
+ "malformed dvi file %s: postamble not found at offset %d"
348
+ % (file .name , offset ))
349
+
350
+ fonts = []
351
+ file .seek (28 , 1 )
352
+ while True :
353
+ byte = file .read (1 )[0 ]
354
+ if 243 <= byte <= 246 :
355
+ _ , _ , _ , _ , a , length = (
356
+ _arg_olen1 (self , byte - 243 ),
357
+ _arg (4 , False , self , None ),
358
+ _arg (4 , False , self , None ),
359
+ _arg (4 , False , self , None ),
360
+ _arg (1 , False , self , None ),
361
+ _arg (1 , False , self , None ))
362
+ fontname = file .read (a + length )[- length :].decode ('ascii' )
363
+ fonts .append (fontname )
364
+ elif byte == 249 :
365
+ break
366
+ else :
367
+ raise ValueError (
368
+ "malformed dvi file %s: opcode %d in postamble"
369
+ % (file .name , byte ))
370
+ file .seek (0 , 0 )
371
+ return fonts
372
+
297
373
def _read (self ):
298
374
"""
299
375
Read one page from the file. Return True if successful,
@@ -593,6 +669,10 @@ class Vf(Dvi):
593
669
----------
594
670
595
671
filename : string or bytestring
672
+ vf file to read
673
+ cache : TeXSupportCache instance, optional
674
+ Support file cache instance, defaults to the TeXSupportCache
675
+ singleton.
596
676
597
677
Notes
598
678
-----
@@ -603,8 +683,8 @@ class Vf(Dvi):
603
683
but replaces the `_read` loop and dispatch mechanism.
604
684
"""
605
685
606
- def __init__ (self , filename ):
607
- Dvi .__init__ (self , filename , 0 )
686
+ def __init__ (self , filename , cache = None ):
687
+ Dvi .__init__ (self , filename , dpi = 0 , cache = cache )
608
688
try :
609
689
self ._first_font = None
610
690
self ._chars = {}
@@ -615,6 +695,27 @@ def __init__(self, filename):
615
695
def __getitem__ (self , code ):
616
696
return self ._chars [code ]
617
697
698
+ def _read_fonts (self ):
699
+ """Read through the font-definition section of the vf file
700
+ and return the list of font names."""
701
+ fonts = []
702
+ self .file .seek (0 , 0 )
703
+ while True :
704
+ byte = self .file .read (1 )[0 ]
705
+ if byte <= 242 or byte >= 248 :
706
+ break
707
+ elif 243 <= byte <= 246 :
708
+ _ = self ._arg (byte - 242 )
709
+ _ , _ , _ , a , length = [self ._arg (x ) for x in (4 , 4 , 4 , 1 , 1 )]
710
+ fontname = self .file .read (a + length )[- length :].decode ('ascii' )
711
+ fonts .append (fontname )
712
+ elif byte == 247 :
713
+ _ , k = self ._arg (1 ), self ._arg (1 )
714
+ _ = self .file .read (k )
715
+ _ , _ = self ._arg (4 ), self ._arg (4 )
716
+ self .file .seek (0 , 0 )
717
+ return fonts
718
+
618
719
def _read (self ):
619
720
"""
620
721
Read one page from the file. Return True if successful,
@@ -652,8 +753,8 @@ def _read(self):
652
753
self ._init_packet (packet_len )
653
754
elif 243 <= byte <= 246 :
654
755
k = self ._arg (byte - 242 , byte == 246 )
655
- c , s , d , a , l = [self ._arg (x ) for x in (4 , 4 , 4 , 1 , 1 )]
656
- self ._fnt_def_real (k , c , s , d , a , l )
756
+ c , s , d , a , length = [self ._arg (x ) for x in (4 , 4 , 4 , 1 , 1 )]
757
+ self ._fnt_def_real (k , c , s , d , a , length )
657
758
if self ._first_font is None :
658
759
self ._first_font = k
659
760
elif byte == 247 : # preamble
0 commit comments