Update of /cvsroot/pygccxml/source/pygccxml/parser
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv23339
Modified Files:
__init__.py
Added Files:
directory_cache.py
Log Message:
Added a new cache class: directory_cache_t
--- NEW FILE: directory_cache.py ---
# Copyright 2004 Roman Yakovenko.
# Distributed under the Boost Software License, Version 1.0. (See
# accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
#
# The initial version of the directory_cache_t class was written
# by Matthias Baas (ba...@ir...).
import os, os.path, gzip, md5
import cPickle
import declarations_cache
class index_entry_t:
"""Entry of the index table in the directory cache index.
Each cached header file (i.e. each *.cache file) has a corresponding
index_entry_t object. This object is used to determine whether the
cache file with the declarations is still valid or not.
This class is a helper class for the directory_cache_t class.
"""
def __init__( self, filesigs, configsig ):
"""Constructor.
filesigs is a list of tuples (fileid, sig)...
configsig is the signature of the configuration object.
"""
self.filesigs = filesigs
self.configsig = configsig
def __getstate__(self):
return (self.filesigs, self.configsig)
def __setstate__(self, state):
self.filesigs, self.configsig = state
class directory_cache_t ( declarations_cache.cache_base_t ):
"""Cache class that stores its data as multiple files inside a directory.
The cache stores one index file called "index.dat" which is always
read by the cache when the cache object is created. Each header file
will have its corresponding *.cache file that stores the declarations
found in the header file. The index file is used to determine whether
a *.cache file is still valid or not (by checking if one of the dependent
files (i.e. the header file itself and all included files) have been
modified since the last run).
"""
def __init__( self, dir="cache", compression=False, md5_sigs=True ):
"""Constructor.
dir is the cache directory (it is created if it does not exist).
If compression is set to True the cache files will be compressed
using gzip.
md5_sigs determines whether file modifications is checked by
computing a md5 digest or by checking the modification date.
"""
declarations_cache.cache_base_t.__init__(self)
# Cache directory
self.__dir = os.path.abspath(dir)
# Flag that determines whether the cache files will be compressed
self.__compression = compression
# Flag that determines whether the signature is a md5 digest or
# the modification time
# (this flag is passed to the filename_repository_t class)
self.__md5_sigs = md5_sigs
# Filename repository
self.__filename_rep = filename_repository_t(self.__md5_sigs)
# Index dictionary (Key is the value returned by _create_cache_key()
# (which is based on the header file name) and value is an
# index_entry_t object)
self.__index = {}
# Flag that indicates whether the index was modified
self.__modified_flag = False
# Check if dir refers to an existing file...
if os.path.isfile(self.__dir):
raise ValueError, "Cannot use %s as cache directory. There is already a file with that name."%self.__dir
# Load the cache or create the cache directory...
if os.path.isdir(self.__dir):
self._load()
else:
# Create the cache directory...
os.mkdir(self.__dir)
def flush(self):
"""Save the index table to disk."""
self._save()
# self.__filename_rep._dump()
def update(self, source_file, configuration, declarations, included_files):
"""Replace a cache entry by a new value.
"""
# Normlize all paths...
source_file = os.path.normpath(source_file)
included_files = map(lambda p: os.path.normpath(p), included_files)
# Create the list of dependent files. This is the included_files list
# + the source file. Duplicate names are removed.
dependent_files = {}
for name in [source_file]+included_files:
dependent_files[name] = 1
dependent_files = dependent_files.keys()
key = self._create_cache_key(source_file)
# Remove an existing entry (if there is one)
# After calling this method, it is guaranteed that __index[key]
# does not exist anymore.
self._remove_entry(source_file, key)
# Create a new entry...
# Create the sigs of all dependent files...
filesigs = []
for filename in dependent_files:
id,sig = self.__filename_rep.acquire_filename(filename)
filesigs.append((id,sig))
configsig = self._create_config_signature(configuration)
entry = index_entry_t(filesigs, configsig)
self.__index[key] = entry
self.__modified_flag = True
# Write the declarations into the cache file...
cachefilename = self._create_cache_filename(source_file)
self._write_file(cachefilename, declarations)
def cached_value(self, source_file, configuration):
"""Return the cached declarations or None.
"""
# Check if the cache contains an entry for source_file
key = self._create_cache_key(source_file)
entry = self.__index.get(key)
if entry==None:
# print "CACHE: %s: Not cached"%source_file
return None
# Check if the entry is still valid. It is not valid if:
# - the source_file has been updated
# - the configuration object has changed (i.e. the header is parsed
# by gccxml with different settings which may influence the
# declarations)
# - the included files have been updated
# (this list is part of the cache entry as it cannot be known
# by the caller when cached_value() is called. It was instead
# passed to update())
# Check if the config is different...
configsig = self._create_config_signature(configuration)
if configsig!=entry.configsig:
# print "CACHE: %s: Config mismatch"%source_file
return None
# Check if any of the dependent files has been modified...
for id, sig in entry.filesigs:
if self.__filename_rep.is_file_modified(id, sig):
# print "CACHE: %s: Entry not up to date"%source_file
return None
# Load and return the cached declarations
cachefilename = self._create_cache_filename(source_file)
decls = self._read_file(cachefilename)
# print "CACHE: Using cached decls for",source_file
return decls
def _load(self):
"""Load the cache.
This method is called in the constructor.
"""
indexfilename = os.path.join(self.__dir, "index.dat")
if os.path.exists(indexfilename):
data = self._read_file(indexfilename)
self.__index = data[0]
self.__filename_rep = data[1]
if self.__filename_rep._md5_sigs!=self.__md5_sigs:
print "CACHE: Warning: md5_sigs stored in the cache is set to %s."%self.__filename_rep._md5_sigs
print " Please remove the cache to change this setting."
self.__md5_sigs = self.__filename_rep._md5_sigs
else:
self.__index = {}
self.__filename_rep = filename_repository_t(self.__md5_sigs)
self.__modified_flag = False
def _save(self):
"""Save the cache index if it was modified.
"""
if self.__modified_flag:
self.__filename_rep.update_id_counter()
indexfilename = os.path.join(self.__dir, "index.dat")
self._write_file(indexfilename, (self.__index,self.__filename_rep))
self.__modified_flag = False
def _read_file(self, filename):
"""Read a Python object from a cache file.
Reads a pickled object from disk and returns it.
"""
if self.__compression:
f = gzip.GzipFile(filename, "rb")
else:
f = file(filename, "rb")
res = cPickle.load(f)
f.close()
return res
def _write_file(self, filename, data):
"""Write a data item into a file.
The data object is written to a file using the pickle mechanism.
"""
if self.__compression:
f = gzip.GzipFile(filename, "wb")
else:
f = file(filename, "wb")
cPickle.dump(data, f, cPickle.HIGHEST_PROTOCOL)
f.close()
def _remove_entry(self, source_file, key):
"""Remove an entry from the cache.
source_file is the name of the header and key is its corresponding
cache key (obtained by a call to _create_cache_key()).
The entry is removed from the index table, any referenced file
name is released and the cache file is deleted.
If key references a non-existing entry, the method returns
immediately.
"""
entry = self.__index.get(key)
if entry==None:
return
# Release the referenced files...
for id, sig in entry.filesigs:
self.__filename_rep.release_filename(id)
# Remove the cache entry...
del self.__index[key]
self.__modified_flag = True
# Delete the corresponding cache file...
cachefilename = self._create_cache_filename(source_file)
try:
os.remove(cachefilename)
except OSError, e:
print "Could not remove cache file (%s)"%e
def _create_cache_key(self, source_file):
"""Return the cache key for a header file."""
path, name = os.path.split(source_file)
return name+str(hash(path))
def _create_cache_filename(self, source_file):
"""Return the cache file name for a header file.
"""
res = self._create_cache_key(source_file)+".cache"
return os.path.join(self.__dir, res)
def _create_config_signature(self, config):
"""Return the signature for a config object.
"""
m = md5.new()
m.update(config.working_directory)
map(lambda p: m.update(p), config.include_paths)
map(lambda p: m.update(p), config.define_symbols)
map(lambda p: m.update(p), config.undefine_symbols)
return m.digest()
class filename_entry_t:
"""This is a record stored in the filename_repository_t class.
The class is an internal class used in the implementation of the
filename_repository_t class and it just serves as a container for
the file name and the reference count.
"""
def __init__( self, filename ):
"""Constructor.
The reference count is initially set to 0.
"""
# Filename
self.filename = filename
# Reference count
self.refcount = 0
# Cached signature value for the file.
# If sig_valid flag is False, the signature still has to be computed,
# otherwise the cached value can be used.
# These attributes must not be pickled!
self.sig_valid = False
self.signature = None
def __getstate__(self):
# Only pickle filename and refcount
return (self.filename, self.refcount)
def __setstate__(self, state):
self.filename, self.refcount = state
self.sig_valid = False
self.signature = None
def inc_ref_count(self):
"""Increase the reference count by 1."""
self.refcount += 1
def dec_ref_count(self):
"""Decrease the reference count by 1 and return the new count."""
self.refcount -= 1
return self.refcount
class filename_repository_t:
"""File name repository.
This class stores file names and can check whether a file has been
modified or not since a previous call.
A file name is stored by calling acquire_filename() which returns
an ID and a signature of the file. The signature can later be used
to check if the file was modified by calling is_file_modified().
If the file name is no longer required release_filename() should be
called so that the entry can be removed from the repository.
"""
def __init__( self, md5_sigs ):
"""Constructor.
"""
# Flag that determines whether the signature is a md5 digest or
# the modification time
# (this flag is passed to the filename_repository_t class)
self._md5_sigs = md5_sigs
# ID lookup table (key: filename / value: id)
self.__id_lut = {}
# Entry dictionary (key: id / value: filename_entry_t)
# This dictionary contains the actual data.
# It must always hold that each entry in __entries has a corresponding
# entry in __id_lut (i.e. the keys in __id_lut must be the names
# stored in __entries)
self.__entries = {}
# A counter for new ids
self.__next_id = 1
def acquire_filename(self, name):
"""Acquire a file name and return its id and its signature.
"""
id = self.__id_lut.get(name)
# Is this a new entry?
if id==None:
# then create one...
id = self.__next_id
self.__next_id += 1
self.__id_lut[name] = id
entry = filename_entry_t(name)
self.__entries[id] = entry
else:
# otherwise obtain the entry...
entry = self.__entries[id]
entry.inc_ref_count()
return id, self._get_signature(entry)
def release_filename(self, id):
"""Release a file name.
"""
entry = self.__entries.get(id)
if entry==None:
raise ValueError, "Invalid filename id (%d)"%id
# Decrease reference count and check if the entry has to be removed...
if entry.dec_ref_count()==0:
del self.__entries[id]
del self.__id_lut[entry.filename]
def is_file_modified(self, id, signature):
"""Check if the file referred to by id has been modified.
"""
entry = self.__entries.get(id)
if entry==None:
raise ValueError, "Invalid filename id (%d)"%id
# Is the signature already known?
if entry.sig_valid:
# use the cached signature
filesig = entry.signature
else:
# compute the signature and store it
filesig = self._get_signature(entry)
entry.signature = filesig
entry.sig_valid = True
return filesig!=signature
def update_id_counter(self):
"""Update the id counter so that it doesn't grow forever.
"""
if len(self.__entries)==0:
self.__next_id = 1
else:
self.__next_id = max(self.__entries.keys())+1
def _get_signature(self, entry):
"""Return the signature of the file stored in entry.
"""
if self._md5_sigs:
# return md5 digest of the file content...
if not os.path.exists(entry.filename):
return None
try:
f = file(entry.filename)
except IOError, e:
print "Cannot determine md5 digest:",e
return None
data = f.read()
f.close()
return md5.new(data).digest()
else:
# return file modification date...
try:
return os.path.getmtime(entry.filename)
except OSError, e:
return None
def _dump(self):
"""Dump contents for debugging/testing.
"""
print 70*"-"
print "ID lookup table:"
for name in self.__id_lut:
id = self.__id_lut[name]
print " %s -> %d"%(name, id)
print 70*"-"
print "%-4s %-60s %s"%("ID", "Filename", "Refcount")
print 70*"-"
for id in self.__entries:
entry = self.__entries[id]
print "%04d %-60s %d"%(id, entry.filename, entry.refcount)
Index: __init__.py
===================================================================
RCS file: /cvsroot/pygccxml/source/pygccxml/parser/__init__.py,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** __init__.py 2 Mar 2006 05:53:14 -0000 1.17
--- __init__.py 6 Mar 2006 14:31:41 -0000 1.18
***************
*** 19,22 ****
--- 19,23 ----
from declarations_cache import file_cache_t
from declarations_cache import dummy_cache_t
+ from directory_cache import directory_cache_t
#shortcut
CONTENT_TYPE = file_configuration_t.CONTENT_TYPE
***************
*** 42,44 ****
def parse_xml_file( content, config=None ):
parser = source_reader_t( config )
! return parser.read_xml_file( content )
\ No newline at end of file
--- 43,45 ----
def parse_xml_file( content, config=None ):
parser = source_reader_t( config )
! return parser.read_xml_file( content )
|