Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit cf99608

Browse files
committed
Made cbook.get_mpl_data make use of the ETag and Last-Modified headers of mod_dav_svn.
svn path=/trunk/matplotlib/; revision=7354
1 parent 0e99d22 commit cf99608

2 files changed

Lines changed: 134 additions & 23 deletions

File tree

CHANGELOG

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
2009-08-04 Made cbook.get_mpl_data make use of the ETag and Last-Modified
2+
headers of mod_dav_svn. - JKS
13

24
2009-08-03 Add PathCollection; modify contourf to use complex
35
paths instead of simple paths with cuts. - EF

lib/matplotlib/cbook.py

Lines changed: 132 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
import numpy as np
1010
import numpy.ma as ma
1111
from weakref import ref
12+
import cPickle
13+
import os.path
14+
import random
15+
import urllib2
1216

1317
import matplotlib
1418

@@ -340,7 +344,119 @@ def to_filehandle(fname, flag='rU', return_opened=False):
340344
def is_scalar_or_string(val):
341345
return is_string_like(val) or not iterable(val)
342346

347+
class _CacheProcessor(urllib2.BaseHandler):
348+
"""
349+
Urllib2 handler that takes care of caching files.
350+
The file cache.pck holds the directory of files to be cached.
351+
"""
352+
def __init__(self, cache_dir):
353+
self.cache_dir = cache_dir
354+
self.read_cache()
355+
self.remove_stale_files()
356+
357+
def in_cache_dir(self, fn):
358+
return os.path.join(self.cache_dir, fn)
359+
360+
def read_cache(self):
361+
"""
362+
Read the cache file from the cache directory.
363+
"""
364+
fn = self.in_cache_dir('cache.pck')
365+
if not os.path.exists(fn):
366+
self.cache = {}
367+
return
368+
369+
f = open(fn, 'rb')
370+
cache = cPickle.load(f)
371+
f.close()
343372

373+
# If any files are deleted, drop them from the cache
374+
for url, (fn, _, _) in cache.items():
375+
if not os.path.exists(self.in_cache_dir(fn)):
376+
del cache[url]
377+
378+
self.cache = cache
379+
380+
def remove_stale_files(self):
381+
"""
382+
Remove files from the cache directory that are not listed in
383+
cache.pck.
384+
"""
385+
listed = set([fn for (_, (fn, _, _)) in self.cache.items()])
386+
for path in os.listdir(self.cache_dir):
387+
if path not in listed and path != 'cache.pck':
388+
os.remove(os.path.join(self.cache_dir, path))
389+
390+
def write_cache(self):
391+
"""
392+
Write the cache data structure into the cache directory.
393+
"""
394+
fn = self.in_cache_dir('cache.pck')
395+
f = open(fn, 'wb')
396+
cPickle.dump(self.cache, f, -1)
397+
f.close()
398+
399+
def cache_file(self, url, data, headers):
400+
"""
401+
Store a received file in the cache directory.
402+
"""
403+
# Pick a filename
404+
rightmost = url.rstrip('/').split('/')[-1]
405+
fn = rightmost
406+
while os.path.exists(self.in_cache_dir(fn)):
407+
fn = rightmost + '.' + str(random.randint(0,9999999))
408+
409+
# Write out the data
410+
f = open(self.in_cache_dir(fn), 'wb')
411+
f.write(data)
412+
f.close()
413+
414+
# Update the cache
415+
self.cache[url] = (fn, headers.get('ETag'), headers.get('Last-Modified'))
416+
self.write_cache()
417+
418+
# These urllib2 entry points are used:
419+
# http_request for preprocessing requests
420+
# http_error_304 for handling 304 Not Modified responses
421+
# http_response for postprocessing requests
422+
423+
def http_request(self, req):
424+
"""
425+
Make the request conditional if we have a cached file.
426+
"""
427+
url = req.get_full_url()
428+
if url in self.cache:
429+
_, etag, lastmod = self.cache[url]
430+
req.add_header("If-None-Match", etag)
431+
req.add_header("If-Modified-Since", lastmod)
432+
return req
433+
434+
def http_error_304(self, req, fp, code, msg, hdrs):
435+
"""
436+
Read the file from the cache since the server has no newer version.
437+
"""
438+
url = req.get_full_url()
439+
fn, _, _ = self.cache[url]
440+
file = open(self.in_cache_dir(fn), 'rb')
441+
handle = urllib2.addinfourl(file, hdrs, url)
442+
handle.code = 304
443+
return handle
444+
445+
def http_response(self, req, response):
446+
"""
447+
Update the cache with the returned file.
448+
"""
449+
if response.code != 200:
450+
return response
451+
else:
452+
data = response.read()
453+
self.cache_file(req.get_full_url(), data, response.headers)
454+
result = urllib2.addinfourl(StringIO.StringIO(data),
455+
response.headers,
456+
req.get_full_url())
457+
result.code = response.code
458+
result.msg = response.msg
459+
return result
344460

345461
def get_mpl_data(fname, asfileobj=True):
346462
"""
@@ -363,32 +479,25 @@ def get_mpl_data(fname, asfileobj=True):
363479
intended for use in mpl examples that need custom data
364480
"""
365481

366-
# TODO: how to handle stale data in the cache that has been
367-
# updated from svn -- is there a clean http way to get the current
368-
# revision number that will not leave us at the mercy of html
369-
# changes at sf?
370-
371-
372-
configdir = matplotlib.get_configdir()
373-
cachedir = os.path.join(configdir, 'mpl_data')
374-
if not os.path.exists(cachedir):
375-
os.mkdir(cachedir)
376-
377-
cachefile = os.path.join(cachedir, fname)
378-
379-
if not os.path.exists(cachefile):
380-
import urllib
381-
url = 'http://matplotlib.svn.sourceforge.net/viewvc/matplotlib/trunk/mpl_data/%s'%urllib.quote(fname)
382-
matplotlib.verbose.report('Attempting to download %s to %s'%(url, cachefile))
383-
urllib.urlretrieve(url, filename=cachefile)
384-
else:
385-
matplotlib.verbose.report('Aleady have mpl_data %s'%fname)
482+
if not hasattr(get_mpl_data, 'opener'):
483+
configdir = matplotlib.get_configdir()
484+
cachedir = os.path.join(configdir, 'mpl_data')
485+
if not os.path.exists(cachedir):
486+
os.mkdir(cachedir)
487+
# Store the cache processor and url opener as attributes of this function
488+
get_mpl_data.processor = _CacheProcessor(cachedir)
489+
get_mpl_data.opener = urllib2.build_opener(get_mpl_data.processor)
386490

491+
url = 'http://matplotlib.svn.sourceforge.net/viewvc/matplotlib/trunk/mpl_data/' + \
492+
urllib2.quote(fname)
493+
response = get_mpl_data.opener.open(url)
387494
if asfileobj:
388-
return to_filehandle(cachefile)
495+
return response
389496
else:
390-
return cachefile
391-
497+
response.close()
498+
p = get_mpl_data.processor
499+
return p.in_cache_dir(p.cache[url][0])
500+
392501
def flatten(seq, scalarp=is_scalar_or_string):
393502
"""
394503
this generator flattens nested containers such as

0 commit comments

Comments
 (0)