99import numpy as np
1010import numpy .ma as ma
1111from weakref import ref
12+ import cPickle
13+ import os .path
14+ import random
15+ import urllib2
1216
1317import matplotlib
1418
@@ -340,7 +344,119 @@ def to_filehandle(fname, flag='rU', return_opened=False):
340344def is_scalar_or_string (val ):
341345 return is_string_like (val ) or not iterable (val )
342346
347+ class _CacheProcessor (urllib2 .BaseHandler ):
348+ """
349+ Urllib2 handler that takes care of caching files.
350+ The file cache.pck holds the directory of files to be cached.
351+ """
352+ def __init__ (self , cache_dir ):
353+ self .cache_dir = cache_dir
354+ self .read_cache ()
355+ self .remove_stale_files ()
356+
357+ def in_cache_dir (self , fn ):
358+ return os .path .join (self .cache_dir , fn )
359+
360+ def read_cache (self ):
361+ """
362+ Read the cache file from the cache directory.
363+ """
364+ fn = self .in_cache_dir ('cache.pck' )
365+ if not os .path .exists (fn ):
366+ self .cache = {}
367+ return
368+
369+ f = open (fn , 'rb' )
370+ cache = cPickle .load (f )
371+ f .close ()
343372
373+ # If any files are deleted, drop them from the cache
374+ for url , (fn , _ , _ ) in cache .items ():
375+ if not os .path .exists (self .in_cache_dir (fn )):
376+ del cache [url ]
377+
378+ self .cache = cache
379+
380+ def remove_stale_files (self ):
381+ """
382+ Remove files from the cache directory that are not listed in
383+ cache.pck.
384+ """
385+ listed = set ([fn for (_ , (fn , _ , _ )) in self .cache .items ()])
386+ for path in os .listdir (self .cache_dir ):
387+ if path not in listed and path != 'cache.pck' :
388+ os .remove (os .path .join (self .cache_dir , path ))
389+
390+ def write_cache (self ):
391+ """
392+ Write the cache data structure into the cache directory.
393+ """
394+ fn = self .in_cache_dir ('cache.pck' )
395+ f = open (fn , 'wb' )
396+ cPickle .dump (self .cache , f , - 1 )
397+ f .close ()
398+
399+ def cache_file (self , url , data , headers ):
400+ """
401+ Store a received file in the cache directory.
402+ """
403+ # Pick a filename
404+ rightmost = url .rstrip ('/' ).split ('/' )[- 1 ]
405+ fn = rightmost
406+ while os .path .exists (self .in_cache_dir (fn )):
407+ fn = rightmost + '.' + str (random .randint (0 ,9999999 ))
408+
409+ # Write out the data
410+ f = open (self .in_cache_dir (fn ), 'wb' )
411+ f .write (data )
412+ f .close ()
413+
414+ # Update the cache
415+ self .cache [url ] = (fn , headers .get ('ETag' ), headers .get ('Last-Modified' ))
416+ self .write_cache ()
417+
418+ # These urllib2 entry points are used:
419+ # http_request for preprocessing requests
420+ # http_error_304 for handling 304 Not Modified responses
421+ # http_response for postprocessing requests
422+
423+ def http_request (self , req ):
424+ """
425+ Make the request conditional if we have a cached file.
426+ """
427+ url = req .get_full_url ()
428+ if url in self .cache :
429+ _ , etag , lastmod = self .cache [url ]
430+ req .add_header ("If-None-Match" , etag )
431+ req .add_header ("If-Modified-Since" , lastmod )
432+ return req
433+
434+ def http_error_304 (self , req , fp , code , msg , hdrs ):
435+ """
436+ Read the file from the cache since the server has no newer version.
437+ """
438+ url = req .get_full_url ()
439+ fn , _ , _ = self .cache [url ]
440+ file = open (self .in_cache_dir (fn ), 'rb' )
441+ handle = urllib2 .addinfourl (file , hdrs , url )
442+ handle .code = 304
443+ return handle
444+
445+ def http_response (self , req , response ):
446+ """
447+ Update the cache with the returned file.
448+ """
449+ if response .code != 200 :
450+ return response
451+ else :
452+ data = response .read ()
453+ self .cache_file (req .get_full_url (), data , response .headers )
454+ result = urllib2 .addinfourl (StringIO .StringIO (data ),
455+ response .headers ,
456+ req .get_full_url ())
457+ result .code = response .code
458+ result .msg = response .msg
459+ return result
344460
345461def get_mpl_data (fname , asfileobj = True ):
346462 """
@@ -363,32 +479,25 @@ def get_mpl_data(fname, asfileobj=True):
363479 intended for use in mpl examples that need custom data
364480 """
365481
366- # TODO: how to handle stale data in the cache that has been
367- # updated from svn -- is there a clean http way to get the current
368- # revision number that will not leave us at the mercy of html
369- # changes at sf?
370-
371-
372- configdir = matplotlib .get_configdir ()
373- cachedir = os .path .join (configdir , 'mpl_data' )
374- if not os .path .exists (cachedir ):
375- os .mkdir (cachedir )
376-
377- cachefile = os .path .join (cachedir , fname )
378-
379- if not os .path .exists (cachefile ):
380- import urllib
381- url = 'http://matplotlib.svn.sourceforge.net/viewvc/matplotlib/trunk/mpl_data/%s' % urllib .quote (fname )
382- matplotlib .verbose .report ('Attempting to download %s to %s' % (url , cachefile ))
383- urllib .urlretrieve (url , filename = cachefile )
384- else :
385- matplotlib .verbose .report ('Aleady have mpl_data %s' % fname )
482+ if not hasattr (get_mpl_data , 'opener' ):
483+ configdir = matplotlib .get_configdir ()
484+ cachedir = os .path .join (configdir , 'mpl_data' )
485+ if not os .path .exists (cachedir ):
486+ os .mkdir (cachedir )
487+ # Store the cache processor and url opener as attributes of this function
488+ get_mpl_data .processor = _CacheProcessor (cachedir )
489+ get_mpl_data .opener = urllib2 .build_opener (get_mpl_data .processor )
386490
491+ url = 'http://matplotlib.svn.sourceforge.net/viewvc/matplotlib/trunk/mpl_data/' + \
492+ urllib2 .quote (fname )
493+ response = get_mpl_data .opener .open (url )
387494 if asfileobj :
388- return to_filehandle ( cachefile )
495+ return response
389496 else :
390- return cachefile
391-
497+ response .close ()
498+ p = get_mpl_data .processor
499+ return p .in_cache_dir (p .cache [url ][0 ])
500+
392501def flatten (seq , scalarp = is_scalar_or_string ):
393502 """
394503 this generator flattens nested containers such as
0 commit comments