[pygccxml-commit] SF.net SVN: pygccxml: [1013] pyplusplus_dev
Brought to you by:
mbaas,
roman_yakovenko
|
From: <rom...@us...> - 2007-04-22 08:41:55
|
Revision: 1013
http://svn.sourceforge.net/pygccxml/?rev=1013&view=rev
Author: roman_yakovenko
Date: 2007-04-22 01:41:55 -0700 (Sun, 22 Apr 2007)
Log Message:
-----------
performance improvement - Py++ will store md5sum of the generated files and will compare against next time it will generate code. This save the need to load alll generated files from disk
Modified Paths:
--------------
pyplusplus_dev/pyplusplus/file_writers/__init__.py
pyplusplus_dev/pyplusplus/file_writers/class_multiple_files.py
pyplusplus_dev/pyplusplus/file_writers/multiple_files.py
pyplusplus_dev/pyplusplus/file_writers/writer.py
pyplusplus_dev/pyplusplus/module_builder/builder.py
pyplusplus_dev/unittests/algorithms_tester.py
Added Paths:
-----------
pyplusplus_dev/pyplusplus/file_writers/md5sum_repository.py
Modified: pyplusplus_dev/pyplusplus/file_writers/__init__.py
===================================================================
--- pyplusplus_dev/pyplusplus/file_writers/__init__.py 2007-04-21 17:21:44 UTC (rev 1012)
+++ pyplusplus_dev/pyplusplus/file_writers/__init__.py 2007-04-22 08:41:55 UTC (rev 1013)
@@ -21,6 +21,8 @@
from single_file import single_file_t
from multiple_files import multiple_files_t
from class_multiple_files import class_multiple_files_t
+from md5sum_repository import repository_t
+from md5sum_repository import cached_repository_t
def has_pypp_extenstion( fname ):
"""returns True if file has Py++ specific extension, otherwise False"""
@@ -37,14 +39,14 @@
sf = single_file_t( data, file_path )
sf.write()
-def write_multiple_files( extmodule, dir_path ):
+def write_multiple_files( extmodule, dir_path, files_sum_repository=None ):
"""writes extmodule to multiple files"""
- mfs = multiple_files_t( extmodule, dir_path )
+ mfs = multiple_files_t( extmodule, dir_path, files_sum_repository, files_sum_repository=files_sum_repository )
mfs.write()
return mfs.written_files
-def write_class_multiple_files( extmodule, dir_path, huge_classes ):
+def write_class_multiple_files( extmodule, dir_path, huge_classes, files_sum_repository ):
"""writes extmodule to multiple files and splits huge classes to few source files"""
- mfs = class_multiple_files_t( extmodule, dir_path, huge_classes )
+ mfs = class_multiple_files_t( extmodule, dir_path, huge_classes, files_sum_repository=files_sum_repository )
mfs.write()
return mfs.written_files
Modified: pyplusplus_dev/pyplusplus/file_writers/class_multiple_files.py
===================================================================
--- pyplusplus_dev/pyplusplus/file_writers/class_multiple_files.py 2007-04-21 17:21:44 UTC (rev 1012)
+++ pyplusplus_dev/pyplusplus/file_writers/class_multiple_files.py 2007-04-22 08:41:55 UTC (rev 1013)
@@ -29,8 +29,16 @@
alias + _main h/cpp this class will contain main registration function.
"""
- def __init__(self, extmodule, directory_path, huge_classes, num_of_functions_per_file=20):
- multiple_files.multiple_files_t.__init__(self, extmodule, directory_path)
+ def __init__( self
+ , extmodule
+ , directory_path
+ , huge_classes
+ , num_of_functions_per_file=20
+ , files_sum_repository=None ):
+ multiple_files.multiple_files_t.__init__(self
+ , extmodule
+ , directory_path
+ , files_sum_repository=files_sum_repository)
self.huge_classes = huge_classes
self.num_of_functions_per_file = num_of_functions_per_file
self.internal_splitters = [
Added: pyplusplus_dev/pyplusplus/file_writers/md5sum_repository.py
===================================================================
--- pyplusplus_dev/pyplusplus/file_writers/md5sum_repository.py (rev 0)
+++ pyplusplus_dev/pyplusplus/file_writers/md5sum_repository.py 2007-04-22 08:41:55 UTC (rev 1013)
@@ -0,0 +1,97 @@
+# Copyright 2004 Roman Yakovenko.
+# Distributed under the Boost Software License, Version 1.0. (See
+# accompanying file LICENSE_1_0.txt or copy at
+# http://www.boost.org/LICENSE_1_0.txt)
+
+"""defines interface for repository of generated files hash"""
+
+import os
+try:
+ from hashlib import md5
+except:
+ from md5 import new as md5
+
+
+def get_md5_text_value( text ):
+ m = md5()
+ m.update( text )
+ return m.hexdigest()
+
+def get_md5_file_value( fpath ):
+ if not os.path.exists( fpath ):
+ return None #file does not exist
+ f = file( fpath, 'rb' )
+ fcontent = f.read()
+ f.close()
+ return get_md5_text_value( fcontent )
+
+class repository_t( object ):
+ def __init__( self ):
+ object.__init__( self )
+
+ def get_file_value( self, fpath ):
+ return NotImplementedError( self.__class__.__name__ )
+
+ def get_text_value( self, fpath ):
+ return NotImplementedError( self.__class__.__name__ )
+
+ def update_value( self, fpath, hash_value ):
+ return NotImplementedError( self.__class__.__name__ )
+
+ def save_values( self ):
+ return NotImplementedError( self.__class__.__name__ )
+
+class dummy_repository_t( repository_t ):
+ def __init__( self ):
+ repository_t.__init__( self )
+
+ def get_file_value( self, fpath ):
+ return get_md5_file_value( fpath )
+
+ def get_text_value( self, text ):
+ return get_md5_text_value( text )
+
+ def update_value( self, fpath, hash_value ):
+ pass
+
+ def save_values( self ):
+ pass
+
+class cached_repository_t( repository_t ):
+ separator = ' '
+ hexdigest_len = 32
+ hexdigest_separator_len = 33
+
+ def __init__( self, file_name ):
+ repository_t.__init__( self )
+ self.__repository = {}
+ self.__repository_file = file_name
+ if os.path.exists( self.__repository_file ):
+ f = file( self.__repository_file, 'r' )
+ for line in f:
+ if len(line) < self.hexdigest_separator_len:
+ continue
+ hexdigest = line[:self.hexdigest_len]
+ fname = line[self.hexdigest_separator_len:].rstrip()
+ self.__repository[ fname ] = hexdigest
+ f.close()
+
+ def get_file_value( self, fpath ):
+ try:
+ return self.__repository[ fpath ]
+ except KeyError:
+ return None
+
+ def get_text_value( self, text ):
+ return get_md5_text_value( text )
+
+ def update_value( self, fpath, hash_value ):
+ self.__repository[ fpath ] = hash_value
+
+ def save_values( self ):
+ lines = []
+ for fpath, hexdigest in self.__repository.iteritems():
+ lines.append( '%s%s%s%s' % ( hexdigest, self.separator, fpath, os.linesep ) )
+ f = file( self.__repository_file, 'w+' )
+ f.writelines( lines )
+ f.close()
Modified: pyplusplus_dev/pyplusplus/file_writers/multiple_files.py
===================================================================
--- pyplusplus_dev/pyplusplus/file_writers/multiple_files.py 2007-04-21 17:21:44 UTC (rev 1012)
+++ pyplusplus_dev/pyplusplus/file_writers/multiple_files.py 2007-04-22 08:41:55 UTC (rev 1013)
@@ -22,7 +22,7 @@
HEADER_EXT = '.pypp.hpp'
SOURCE_EXT = '.pypp.cpp'
- def __init__(self, extmodule, directory_path, write_main=True):
+ def __init__(self, extmodule, directory_path, write_main=True, files_sum_repository=None):
"""Constructor.
@param extmodule: The root of a code creator tree
@@ -34,7 +34,7 @@
that calls all the registration methods.
@type write_main: boolean
"""
- writer.writer_t.__init__(self, extmodule)
+ writer.writer_t.__init__( self, extmodule, files_sum_repository )
self.__directory_path = directory_path
self.create_dir( directory_path )
self.include_creators = [] # List of include_t creators that contain the generated headers
@@ -49,7 +49,7 @@
def write_file( self, fpath, content ):
self.written_files.append( fpath )
- writer.writer_t.write_file( fpath, content )
+ writer.writer_t.write_file( fpath, content, self.files_sum_repository )
def create_dir( self, directory_path ):
"""Create the output directory if it doesn't already exist.
@@ -385,3 +385,4 @@
, self.include_creators )
main_cpp = os.path.join( self.directory_path, self.extmodule.body.name + '.main.cpp' )
self.write_file( main_cpp, self.extmodule.create() + os.linesep )
+ self.files_sum_repository.save_values()
Modified: pyplusplus_dev/pyplusplus/file_writers/writer.py
===================================================================
--- pyplusplus_dev/pyplusplus/file_writers/writer.py 2007-04-21 17:21:44 UTC (rev 1012)
+++ pyplusplus_dev/pyplusplus/file_writers/writer.py 2007-04-22 08:41:55 UTC (rev 1013)
@@ -10,6 +10,7 @@
from pyplusplus import _logging_
from pyplusplus import code_creators
from pyplusplus import code_repository
+import md5sum_repository
class writer_t(object):
"""Base class for all module/code writers.
@@ -21,17 +22,22 @@
"""
logger = _logging_.loggers.file_writer
- def __init__(self, extmodule):
+ def __init__(self, extmodule, files_sum_repository=None):
object.__init__(self)
self.__extmodule = extmodule
-
-
- def _get_extmodule(self):
+ self.__files_sum_repository = files_sum_repository
+ if None is files_sum_repository:
+ self.__files_sum_repository = md5sum_repository.dummy_repository_t()
+
+ @property
+ def extmodule(self):
+ """The root of the code creator tree ( code_creators.module_t )"""
return self.__extmodule
- extmodule = property( _get_extmodule,
- doc="""The root of the code creator tree.
- @type: module_t""")
-
+
+ @property
+ def files_sum_repository( self ):
+ return self.__files_sum_repository
+
def write(self):
""" Main write method. Should be overridden by derived classes. """
raise NotImplementedError()
@@ -57,7 +63,7 @@
self.write_file( os.path.join( dir, code_repository.named_tuple.file_name )
, code_repository.named_tuple.code )
@staticmethod
- def write_file( fpath, content ):
+ def write_file( fpath, content, files_sum_repository=None ):
"""Write a source file.
This method writes the string content into the specified file.
@@ -82,22 +88,32 @@
fcontent_new.append( os.linesep ) #keep gcc happy
fcontent_new = ''.join( fcontent_new )
- if os.path.exists( fpath ):
+ new_hash_value = None
+ if files_sum_repository:
+ new_hash_value = files_sum_repository.get_text_value( fcontent_new )
+ curr_hash_value = files_sum_repository.get_file_value( fname )
+ if new_hash_value == curr_hash_value:
+ writer_t.logger.debug( 'file was not changed( hash ) - done( %f seconds )'
+ % ( time.clock() - start_time ) )
+ return
+ elif os.path.exists( fpath ):
#small optimization to cut down compilation time
f = file( fpath, 'rb' )
fcontent = f.read()
f.close()
if fcontent == fcontent_new:
- writer_t.logger.debug( 'file was not changed - done( %f seconds )'
+ writer_t.logger.debug( 'file was not changed( content ) - done( %f seconds )'
% ( time.clock() - start_time ) )
return
else:
- writer_t.logger.debug( 'file does not exist' )
+ writer_t.logger.debug( 'file changed or it does not exist' )
writer_t.create_backup( fpath )
f = file( fpath, 'w+b' )
f.write( fcontent_new )
f.close()
+ if new_hash_value:
+ files_sum_repository.update_value( fname, new_hash_value )
writer_t.logger.info( 'file "%s" - updated( %f seconds )' % ( fname, time.clock() - start_time ) )
def get_user_headers( self, creators ):
Modified: pyplusplus_dev/pyplusplus/module_builder/builder.py
===================================================================
--- pyplusplus_dev/pyplusplus/module_builder/builder.py 2007-04-21 17:21:44 UTC (rev 1012)
+++ pyplusplus_dev/pyplusplus/module_builder/builder.py 2007-04-22 08:41:55 UTC (rev 1013)
@@ -303,7 +303,11 @@
self.__merge_user_code()
file_writers.write_file( self.code_creator, file_name )
- def split_module(self, dir_name, huge_classes=None, on_unused_file_found=os.remove):
+ def split_module( self
+ , dir_name
+ , huge_classes=None
+ , on_unused_file_found=os.remove
+ , use_files_sum_repository=False):
"""
Writes module to multiple files
@@ -314,13 +318,30 @@
@param on_unused_file_found: callable object that represents the action that should be taken on
file, which is no more in use
+
+ @use_files_sum_repository: Py++ can generate file, which will contain md5 sum of every generated file.
+ Next time you generate code, md5sum will be loaded from the file and compared.
+ This could speed-up code generation process by 10-15%.
"""
self.__merge_user_code()
+
+ files_sum_repository = None
+ if use_files_sum_repository:
+ cache_file = os.path.join( dir_name, self.code_creator.body.name + '.md5.sum' )
+ files_sum_repository = file_writers.cached_repository_t( cache_file )
+
written_files = []
if None is huge_classes:
- written_files = file_writers.write_multiple_files( self.code_creator, dir_name )
+ written_files = file_writers.write_multiple_files(
+ self.code_creator
+ , dir_name
+ , files_sum_repository=files_sum_repository )
else:
- written_files = file_writers.write_class_multiple_files( self.code_creator, dir_name, huge_classes )
+ written_files = file_writers.write_class_multiple_files(
+ self.code_creator
+ , dir_name
+ , huge_classes
+ , files_sum_repository=files_sum_repository )
all_files = os.listdir( dir_name )
all_files = map( lambda fname: os.path.join( dir_name, fname ), all_files )
Modified: pyplusplus_dev/unittests/algorithms_tester.py
===================================================================
--- pyplusplus_dev/unittests/algorithms_tester.py 2007-04-21 17:21:44 UTC (rev 1012)
+++ pyplusplus_dev/unittests/algorithms_tester.py 2007-04-22 08:41:55 UTC (rev 1013)
@@ -184,7 +184,8 @@
mb.build_code_creator('x_class_multi')
mb.split_module( autoconfig.build_dir
, [ mb.class_( '::tester::x' ) ]
- , on_unused_file_found=lambda fpath: fpath )
+ , on_unused_file_found=lambda fpath: fpath
+ , use_files_sum_repository=True)
class split_sequence_tester_t(unittest.TestCase):
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|