|
From: <jo...@us...> - 2009-08-15 20:00:23
|
Revision: 7493
http://matplotlib.svn.sourceforge.net/matplotlib/?rev=7493&view=rev
Author: jouni
Date: 2009-08-15 20:00:09 +0000 (Sat, 15 Aug 2009)
Log Message:
-----------
get_sample_data improvements: remove old files from subdirectories and
not only the top-level directory; try to handle the disconnected use case;
use the perhaps more stable svnroot URL instead of the viewvc one
Modified Paths:
--------------
trunk/matplotlib/lib/matplotlib/cbook.py
Modified: trunk/matplotlib/lib/matplotlib/cbook.py
===================================================================
--- trunk/matplotlib/lib/matplotlib/cbook.py 2009-08-15 18:37:25 UTC (rev 7492)
+++ trunk/matplotlib/lib/matplotlib/cbook.py 2009-08-15 20:00:09 UTC (rev 7493)
@@ -355,7 +355,7 @@
class ViewVCCachedServer(urllib2.BaseHandler):
"""
Urllib2 handler that takes care of caching files.
- The file cache.pck holds the directory of files to be cached.
+ The file cache.pck holds the directory of files that have been cached.
"""
def __init__(self, cache_dir, baseurl):
self.cache_dir = cache_dir
@@ -386,9 +386,14 @@
cache = cPickle.load(f)
f.close()
+ # Earlier versions did not have the full paths in cache.pck
+ for url, (fn, x, y) in cache.items():
+ if not os.path.isabs(fn):
+ cache[url] = (self.in_cache_dir(fn), x, y)
+
# If any files are deleted, drop them from the cache
for url, (fn, _, _) in cache.items():
- if not os.path.exists(self.in_cache_dir(fn)):
+ if not os.path.exists(fn):
del cache[url]
self.cache = cache
@@ -398,15 +403,21 @@
Remove files from the cache directory that are not listed in
cache.pck.
"""
- listed = set([fn for (_, (fn, _, _)) in self.cache.items()])
- for path in os.listdir(self.cache_dir):
- if path not in listed and path != 'cache.pck':
- thisfile = os.path.join(self.cache_dir, path)
- if not os.path.isdir(thisfile):
- matplotlib.verbose.report('ViewVCCachedServer:remove_stale_files: removing %s'%thisfile,
- level='debug')
- os.remove(thisfile)
+ # TODO: remove empty subdirectories
+ listed = set(fn for (_, (fn, _, _)) in self.cache.items())
+ existing = reduce(set.union,
+ (set(os.path.join(dirpath, fn) for fn in filenames)
+ for (dirpath, _, filenames) in os.walk(self.cache_dir)))
+ matplotlib.verbose.report(
+ 'ViewVCCachedServer: files listed in cache.pck: %s' % listed, 'debug')
+ matplotlib.verbose.report(
+ 'ViewVCCachedServer: files in cache directory: %s' % existing, 'debug')
+ for path in existing - listed - set([self.in_cache_dir('cache.pck')]):
+ matplotlib.verbose.report('ViewVCCachedServer:remove_stale_files: removing %s'%path,
+ level='debug')
+ os.remove(path)
+
def write_cache(self):
"""
Write the cache data structure into the cache directory.
@@ -424,17 +435,12 @@
fn = url[len(self.baseurl):]
fullpath = self.in_cache_dir(fn)
- #while os.path.exists(self.in_cache_dir(fn)):
- # fn = rightmost + '.' + str(random.randint(0,9999999))
-
-
-
- f = open(self.in_cache_dir(fn), 'wb')
+ f = open(fullpath, 'wb')
f.write(data)
f.close()
# Update the cache
- self.cache[url] = (fn, headers.get('ETag'), headers.get('Last-Modified'))
+ self.cache[url] = (fullpath, headers.get('ETag'), headers.get('Last-Modified'))
self.write_cache()
# These urllib2 entry points are used:
@@ -459,9 +465,9 @@
"""
url = req.get_full_url()
fn, _, _ = self.cache[url]
- cachefile = self.in_cache_dir(fn)
- matplotlib.verbose.report('ViewVCCachedServer: reading data file from cache file "%s"'%cachefile)
- file = open(cachefile, 'rb')
+ matplotlib.verbose.report('ViewVCCachedServer: reading data file from cache file "%s"'
+ %fn, 'debug')
+ file = open(fn, 'rb')
handle = urllib2.addinfourl(file, hdrs, url)
handle.code = 304
return handle
@@ -470,6 +476,8 @@
"""
Update the cache with the returned file.
"""
+ matplotlib.verbose.report('ViewVCCachedServer: received response %d: %s'
+ % (response.code, response.msg), 'debug')
if response.code != 200:
return response
else:
@@ -489,11 +497,11 @@
store it in the cachedir.
If asfileobj is True, a file object will be returned. Else the
- path to the file as a string will be returned
-
+ path to the file as a string will be returned.
"""
+ # TODO: time out if the connection takes forever
+ # (may not be possible with urllib2 only - spawn a helper process?)
-
# quote is not in python2.4, so check for it and get it from
# urllib if it is not available
quote = getattr(urllib2, 'quote', None)
@@ -501,13 +509,25 @@
import urllib
quote = urllib.quote
+ # retrieve the URL for the side effect of refreshing the cache
url = self.baseurl + quote(fname)
- response = self.opener.open(url)
+ error = 'unknown error'
+ matplotlib.verbose.report('ViewVCCachedServer: retrieving %s'
+ % url, 'debug')
+ try:
+ response = self.opener.open(url)
+ except urllib2.URLError, e:
+ # could be a missing network connection
+ error = str(e)
+ cached = self.cache.get(url)
+ if cached is None:
+ msg = 'file %s not in cache; received %s when trying to retrieve' \
+ % (fname, error)
+ raise KeyError(msg)
+
+ fname = cached[0]
- relpath = self.cache[url][0]
- fname = self.in_cache_dir(relpath)
-
if asfileobj:
return file(fname)
else:
@@ -519,7 +539,7 @@
Check the cachedirectory ~/.matplotlib/sample_data for a sample_data
file. If it does not exist, fetch it with urllib from the mpl svn repo
- http://matplotlib.svn.sourceforge.net/viewvc/matplotlib/trunk/sample_data/
+ http://matplotlib.svn.sourceforge.net/svnroot/matplotlib/trunk/sample_data/
and store it in the cachedir.
@@ -539,7 +559,7 @@
if myserver is None:
configdir = matplotlib.get_configdir()
cachedir = os.path.join(configdir, 'sample_data')
- baseurl = 'http://matplotlib.svn.sourceforge.net/viewvc/matplotlib/trunk/sample_data/'
+ baseurl = 'http://matplotlib.svn.sourceforge.net/svnroot/matplotlib/trunk/sample_data/'
myserver = get_sample_data.myserver = ViewVCCachedServer(cachedir, baseurl)
return myserver.get_sample_data(fname, asfileobj=asfileobj)
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|