From: <bl...@us...> - 2003-07-31 08:00:09
|
Update of /cvsroot/cpptool/rfta/src/pyrfta/test/rfta In directory sc8-pr-cvs1:/tmp/cvs-serv22556 Added Files: analyser.py node.py parser.py parsertest.py project.py refactoring.py Log Message: * added a rough squetch of an attempt to make a generic interface to refactoring operation (to simplify language binding) * added a port of the MiniParser framework with a tokenizer and node transform system --- NEW FILE: analyser.py --- class Analyser: def __init__( self, document ): class Symbol: def __init__( self ): pass class LocaleVariableDeclaratorExpr: def __init__( self ): self.declarators = [] def getBaseType( self ): pass def getDeclarators( self ): return self.declarators class LocaleVariableDeclarator: def getDeclarator( self ): pass def getBaseTypeSuffix( self ): pass def getArrayType( self ): pass # composed type: base + prefix + suffix def getType( self ): pass def getName( self ): pass def getInitializer( self ): pass class LocaleVariableRef(Symbol): def __init__( self, node ): self.node = node self.occurrences = [ node ] def getName( self ): return node.getName() def setName( self ): pass # composed type: base + prefix + suffix def getType( self ): pass def getDeclarator( self ): pass def getBaseTypeSuffix( self ): pass def getArrayType( self ): pass def getOccurrences( self ): return self.occurrences class FunctionBody(Symbol): def __init__( self, node ): self.node = node def getName( self ): pass def setName( self ): pass def getModifiers( self ): pass def getParameters( self ): pass def getBody( self ): pass class MemberFunctionBody(FunctionBody): def __init__( self, node ): self.node = node def getClass( self ): pass def getMemberInitializers( self ): pass def --- NEW FILE: node.py --- # node stuff class Range: def __init__( self, start_index, end_index ): self.start_index = start_index self.end_index = end_index def getStartIndex( self ): return self.start_index def setStartIndex( self, start_index ): self.start_index = start_index def getEndIndex( self ): return self.end_index def setEndIndex( self, end_index ): self.end_index = end_index def getLength( self ): return self.end_index - self.start_index def setLength( self, length ): self.end_index = self.start_index + length def getTextRange( self, text ): return text[ self.start_index : self.end_index ] def isEmpty( self ): return getLength() <= 0 def contains( self, other_range ): if self.isEmpty() or other_range.isEmpty() or \ self.end_index ==other_range.start_index: return False return self.start_index >= other_range.start_index and \ self.end_index <= other_range.end_index def overlap( self, other_range ): return other_range.start_index < self.end_index and \ other_range.end_index > self.start_index: def __str__( self ): return "Range(%d,%d)" % (self.start_index,self.end_index) class Node: def __init__( self, manager ): self.manager = manager self.source = None self.source_range = None self.parent = None def isFromSource( self ): return self.source_range != None def getSourceText( self ): if self.source_range: return self.source_range.getTextRange( self.source ) return "" def getSourceRange( self ): return self.source_range def setSource( self, source, range ): self.source = source self.source_range = range def accept( self, visitor ): pass def getParent( self ): return self.parent def getManager( self ): return self.manager def getEventManager( self ): return self.manager.getNodeEventManager() class CompositeNode(Node): def __init__( self, manager ): Node.__init__( self ) self.children = [] self.keyed_children = {} def getChildren( self ): return self.children def getChildCount( self ): return len( self.children ) def setChild( self, child, index ): self.removeChildAt( index ) self.insertChildAt( child, index ) def getChildIndex( self, child ): return self.children.index( child ) def insertChildAt( self, child, index ): self.children[ index:index ] = [child] child.parent = self self.getEventManager().childInsertedAt( self, child, index ) def insertChildAfter( self, insert_after, child_to_insert ): self.insertChild( getChildIndex(insert_after) + 1, child_to_insert ) def insertChildBefore( self, insert_before, child_to_insert ): self.insertChild( getChildIndex(insert_before), child_to_insert ) def appendChild( self, child ): self.insertChild( child, len(self.children) ) def removeChildAt( self, index ): old_child = self.children[index] self.children[index:index] = [] old_child.parent = None self.getEventManager().childRemovedAt( self, old_child, index ) def removeChild( self, child ): child_index = self.children.index( child ) self.removeChildAt( self, child_index ) def getKeyedChild( self, key ): return self.keyed_children[ key ] def getOptionalKeyedChild( self, key ): return self.keyed_children.get( key, None ) def setKeyedChild( self, key, child ): old_child = self.keyed_children.get( key, None ) if old_child: self.keyed_children[ key ] = child old_child.parent = None self.getEventManager().childReplaced( self, key, old_child, child ) elif child: self.keyed_children[ key ] = child child.parent = self self.getEventManager().childAdded( self, key, child ) def removeKeyedChild( self, key ): if key in self.keyed_children: self.keyed_children[ key ].parent = None del self.keyed_children[ key ] self.getEventManager().childRemoved( self, key, old_child ) def acceptChildren( self, visitor ): for node in self.keyed_children.values(): node.accept( visitor ) for node in self.children: node.accept( visitor ) # ------------------------- # Leaf Nodes # ------------------------- class TextNodeMixIn: def __init__( self, text = None ): self.text = text def getText( self ): if self.isFromSource(): return self.getSourceText() else: return self.text def setText( self, text ): self.text = text class Text(Node,TextNodeMixIn): def __init__( self, manager, text = None ): Node.__init__( self, manager ) TextNodeMixIn.__init__( self, text ) def accept( self, visitor ): visitor.visitText( self ) class Identifier(Text): def __init__( self, manager, identifier = None ): Text.__init__( self, manager, identifier ) def accept( self, visitor ): visitor.visitIdentifier( self ) def resolve( self ): pass # ------------------------- # Statements Nodes # ------------------------- class BlockStatement(CompositeNode): def __init__( self, manager ): CompositeNode.__init__( self, manager ) def accept( self, visitor ): visitor.visitBlockStatement( self ) class IfStatement(CompositeNode): def __init__( self, manager, condition_node, then_node, else_node = None ): CompositeNode.__init__( self, manager ) self.setKeyedChild( "condition", condition_node ) self.setKeyedChild( "then", then_node ) self.setKeyedChild( "else", else_node ) def getCondition( self ): return self.getKeyedChild( "condition" ) def setCondition( self, condition_node ): self.setKeyedChild( "condition", condition_node ) def getThen( self ): return self.getKeyedChild( "then" ) def setThen( self, then_node ): self.setKeyedChild( "then", then_node ) def getElse( self ): return self.getOptionalKeyedChild( "else" ) def setElse( self, else_node ): self.setKeyedChild( "else", else_node ) def accept( self, visitor ): visitor.visitIfStatement( self ) class DeclarationStatement(CompositeNode): def __init__( self, manager, declaration_node ): CompositeNode.__init__( self, manager ) self.setKeyedChild( "declaration", declaration_node ) def getDeclaration( self ): return self.getKeyedChild( "declaration" ) def setDeclaration( self, declaration_node ): self.setKeyedChild( declaration_node ) def accept( self, visitor ): visitor.visitDeclarationStatement( self ) class ExpressionStatement(CompositeNode): def __init__( self, manager, expression ): CompositeNode.__init__( self, manager ) self.setKeyedChild( "expression", expression ) def getExpression( self ): return self.getKeyedChild( "expression" ) def setExpression( self, expression ): self.setKeyedChild( "expression", expression ) def accept( self, visitor ): visitor.visitExpressionStatement( self ) # -------------------------- # Expressions Nodes # -------------------------- # generic partially parsed expression # has identifiers and operators child nodes class UnspecifiedExpression(CompositeNode,TextNodeMixIn): def __init__( self, manager, text = None ): CompositeNode.__init__( self, manager ) TextNodeMixIn.__init__( self, text ) def accept( self, visitor ): visitor.visitUnspecifiedExpression( self ) class DeclarationExpression(CompositeNode): def __init__( self, manager, base_type ): CompositeNode.__init__( self, manager ) self.setKeyedChild( "base_type", base_type ) def getBaseType( self ): return self.getKeyedChild( "base_type" ) def setBaseType( self, base_type ): self.SetKeyedChild( "base_type", base_type ) def accept( self, visitor ): visitor.visitDeclarationExpression( self ) class Declarator(CompositeNode): def __init__( self, manager, name ): CompositeNode.__init__( self, manager ) self.setKeyedChild( "name", name ) def getName( self ): return self.getKeyedChild( "name" ) def getBaseTypeSuffix( self ): return self.getKeyedChild( "base_type_suffix" ) def setBaseTypeSuffix( self, base_type_suffix ): self.setKeyedChild( "base_type_suffix", base_type_suffix ) def getArrayType( self ): return self.getOptionalKeyedChild( "array_type" ) def setArrayType( self, array_type ): self.setKeyedChild( "array_type" ) def getInitializer( self ): return self.getOptionalKeyedChild( "initializer" ) def setInitializer( self, initializer ): self.setKeyedChild( "initializer" ) def accept( self, visitor ): visitor.visitDeclarator( self ) class AssignInitializer(CompositeNode): def __init__( self, manager, expression ): CompositeNode.__init__( self, manager ) self.setKeyedChild( "expression", expression ) def getExpression( self ): return self.getKeyedChild( "expresion" ) def setExpression( self, expression ): self.setKeyedChild( "expression", expression ) def accept( self, visitor ): visitor.visitAssignInitializer( self ) class ConstructInitializer(CompositeNode): def __init__( self, manager, parameters ): CompositeNode.__init__( self, manager ) self.setKeyedChild( "parameters", parameters ) def getParameters( self ): return self.getKeyedChild( "parameters" ) def setParameters( self, parameters ): self.setKeyedChild( "parameters", parameters ) def accept( self, visitor ): visitor.visitConstructInitializer( self ) # ---------------------------- # Declarations # ---------------------------- class Type(Text): def __init__( self, manager, type = None ): Text.__init__( self, manager, type ) def accept( self, visitor ): visitor.visitType( self ) # static, virtual const, volatile, =0 modifiers. class Modifier(Text): def __init__( self, manager, modifier = None ): Text.__init__( self, manager, modifier ) def accept( self, visitor ): visitor.visitModifier( self ) class Parameter(CompositeNode): def __init__( self, manager, type, name = None, array_type = None, initializer = None ): CompositeNode.__init__( self, manager ) self.setKeyedChild( "type", type ) self.setKeyedChild( "name", name ) self.setKeyedChild( "array_type", array_type ) self.setKeyedChild( "initializer", initializer ) def getType( self ): return self.getKeyedChild( "type" ) def setType( self, type ): self.setKeyedChild( "type", type ) def getName( self ): return self.getOptionalKeyedChild( "name" ) def setName( self, name ): self.setKeyedChild( "name", name ): def getArrayType( self ): return self.getOptionalKeyedChild( "array_type" ) def setArrayType( self, array_type ): self.setKeyedChild( "array_type", array_type ) def getInitializer( self ): return self.getOptionalKeyedChild( "initializer" ) def setInitializer( self, initializer ): self.setKeyedChild( "initializer", initializer ) class Parameters(CompositeNode): def __init__( self, manager ): CompositeNode.__init__( self, manager ) class FunctionBodyDecl(CompositeNode): def __init__( self, manager, body, name, parameters, return_type = None ): CompositeNode.__init__( self, manager) self.setKeyedChild( "name", name ) self.setKeyedChild( "parameters", parameters ) if not return_type: return_type = Type( manager, "int" ) self.setKeyedChild( "return_type", return_type ) def getName( self ): return self.getKeyedChild( "name" ) def setName( self, name ): self.setKeyedChild( "name", name ) def getParameters( self ): return self.getKeyedChidl( "parameters" ) def setParameters( self, parameters ): self.setKeyedChild( "parameters", parameters" ) def getReturnType( self ): return self.getReturnType( "return_type" ) def setReturnType( self, return_type ): self.setKeyedChild( "return_type", return_type ) def getBody( self ): return self.getKeyedChild( "body" ) def setBody( self, body ): self.setKeyedChild( "body", body ) --- NEW FILE: parser.py --- import re, string cpp_keywords = """"__asm else main struct __assume enum __multiple_inheritance switch auto __except __single_inheritance template __based explicit __virtual_inheritance this bool extern mutable thread break false naked throw case __fastcall namespace true catch __finally new try __cdecl float noreturn __try char for operator typedef class friend private typeid const goto protected typename const_cast if public union continue inline register unsigned __declspec __inline reinterpret_cast using declaration, using directive default int return uuid delete __int8 short __uuidof dllexport __int16 signed virtual dllimport __int32 sizeof void do __int64 static volatile double __leave static_cast wmain dynamic_cast long __stdcall while""" IDENTIFIER = "id" STRING = "string" CHAR = "char" INTEGER = "integer" FLOAT = "float" SYMBOL = "symbol" ERROR = "error" CPP_COMMENT = "cpp_comment" C_COMMENT = "c_comment" PP_DIRECTIVE = "pp_directive" SPACES = "spaces" END_STREAM = "<<end>>" class Token: def __init__( self, type, start_pos, end_pos, source ): self.type = type self.start_pos = start_pos self.end_pos = end_pos self.source = source def getType( self ): return self.type def getText( self ): return self.source[self.start_pos:self.end_pos] def getStartPos( self ): return self.start_pos def getEndPos( self ): return self.end_pos def getSource( self ): return self.source def __eq__( self, other ): return self.type == other.type and self.start_pos == other.start_pos \ and self.end_pos == other.end_pos and self.source == other.source def __ne__( self, other ): return not self.__eq__( other ) def __repr__( self ): return str(self) def __str__( self ): return "Token(%s, %d-%d = '%s')" % (self.type,self.start_pos,self.end_pos,self.getText()) class CppScanner: def __init__( self ): patterns = [ ( SPACES, '[\\s]+' ), ( IDENTIFIER, '[A-Za-z_][A-Za-z0-9_]*' ), ( STRING, '"' ), ( CHAR, "'" ), ( INTEGER, '[0-9]+L?' ), ( INTEGER, '0x[0-9A-Fa-f]+L?' ), ( FLOAT, '([0-9]+)?\\.[0-9]+(e[-+]?[0-9]+)?[fFL]?' ), ( FLOAT, '[0-9]+e[-+]?[0-9]+[fFL]?' ), ( SYMBOL, self._makeChoice( ':: << >> <= >= \\|\\| && &= \\|= \\+= -= /= \\*= \\^=' ) ), ( SYMBOL, self._makeChoice( '\\( \\) \\[ \\] \\{ \\} ~ / - \\+ / \\* = \\^ % < > & \\| ; , : \\?' ) ), ( CPP_COMMENT, '//' ), ( C_COMMENT, '/\\*' ), ( PP_DIRECTIVE, '#' ) ] self.patterns = [] for pattern in patterns: # print "compiling pattern: ", pattern[0], ':', pattern[1] self.patterns.append( ( pattern[0], re.compile(pattern[1]) ) ) self.keywords = {} for keyword in string.split(cpp_keywords): self.keywords[keyword] = None def _makeChoice( self, choices ): return string.join( string.split( choices ), '|' ) def tokenize( self, text ): self.pos = 0 tokens = [] last_token_is_error = False while self.pos < len(text): token = self._readOneToken( text ) if not token: # skipped continue if token.getType() == ERROR: # collate error token if last_token_is_error: tokens[-1] = Token( ERROR, tokens[-1].getStartPos(), token.getEndPos(), token.getSource() ) continue last_token_is_error = True else: last_token_is_error = False tokens.append( token ) return tokens def _readOneToken( self, text ): best_match = (0, None) # matched_len, matched_pattern # print "Trying to match: '%s'" % text for pattern in self.patterns: match = pattern[1].match( text, self.pos ) if match: # print "Match found: %s = %s %d-%d" % (pattern[0],text[match.start():match.end()],match.start(),match.end()) matched_len = match.end() - match.start() if not best_match or best_match[0] < matched_len: best_match = (matched_len,pattern) token = None if best_match[1]: # print "Best match is: %s = %s" % (best_match[1],text[self.pos:self.pos + best_match[0]]) token = self._makeToken( best_match[1], self.pos, self.pos + best_match[0], text ) else: # print "No match found => Error" token = self._makeErrorToken( self.pos, self.pos+1, text ) if token: self.pos = token.getEndPos() # print "Generated token:", token return token def _makeToken( self, pattern, start_pos, end_pos, text ): type = pattern[0] if type == SPACES: self.pos = end_pos elif type == STRING: return self._readStringToken( type, end_pos, text, '"' ) elif type == CHAR: return self._readStringToken( type, end_pos, text, "'" ) elif type == CPP_COMMENT: self._skipComment( end_pos, text, '\n' ) elif type == C_COMMENT: self._skipComment( end_pos, text, '*/' ) elif type == PP_DIRECTIVE: return self._readPreprocessorDirective( end_pos, text ) else: if type == IDENTIFIER and text[start_pos:end_pos] in self.keywords: type = SYMBOL return Token( type, start_pos, end_pos, text ) return None def _skipComment( self, pos, text, end_pattern ): end_pos = string.find( text, end_pattern, pos ) if end_pos == -1: self.pos = len(text) else: self.pos = end_pos + len(end_pattern) def _readStringToken( self, type, pos, text, delimiter_char ): # print "Trying to match string '%s' : %d" % (text,pos) not_found = True while not_found: end_pos = string.find( text, delimiter_char, pos ) if end_pos == -1: return self._makeErrorToken( self.pos, len(text), text ) if text[end_pos-1] != '\\': not_found = False pos = end_pos + 1 return Token( type, self.pos, pos, text ) def _readPreprocessorDirective( self, pos, text ): start_pos = pos - 1 while True: next_eol_pos = string.find( text, '\n', pos ) if next_eol_pos != -1: next_comment_pos = string.find( text, '/*', pos, next_eol_pos ) if next_comment_pos != -1: # skip C comment self._skipComment( next_comment_pos + 2, text, '*/' ) pos = self.pos continue if text[next_eol_pos-1] == '\\': # directive continue on next line pos = next_eol_pos + 1 continue pos = next_eol_pos + 1 else: pos = len(text) break self.pos = pos return Token( PP_DIRECTIVE, start_pos, pos, text ) def _makeErrorToken( self, start_pos, end_pos, text ): return Token( ERROR, start_pos, end_pos, text ) class CppParser: def __init__( self ): pass def parse( self, text ): scanner = CppScanner() self.tokens = scanner.tokenize() class NodeBase: def getType( self ): pass def getChildren( self ): pass def isLeaf( self ): return False def __repr__( self ): return str(self) def prettyPrint( self, margin = None, indentLevel = None ): if margin == None: margin = 0 if indentLevel == None: indentLevel = 0 indentBase = " " * margin indent = indentBase if indentLevel > 0: indent += "| " * (indentLevel-1) + "|-" print indent + self._getPrettyNodeHeader() indentLevel += 1 for child in self.getChildren(): if child: child.prettyPrint( margin, indentLevel ) else: print indent + "|-NONE" class Node(NodeBase): def __init__( self, type, children = None ): self.type = type if children: self.children = children else: self.children = [] def getType( self ): return self.type def setType( self, type ): self.type = type def getChildren( self ): return self.children def addChild( self, child ): self.children.append( child ) def _getPrettyNodeHeader( self ): return "Node(%s : %d children)" % (self.type,len(self.children)) def __str__( self ): return "Node(%s, %s)" % (self.type, str(self.children)) class TokenNode(NodeBase): def __init__( self, token ): self.token = token def getType( self ): return self.token.getType() def getChildren( self ): return [] def isLeaf( self ): return True def _getPrettyNodeHeader( self ): return str(self) def __str__( self ): return "Token(%s,'%s')" % (self.getType(),self.token.getText()) class Match: def __init__( self, success, value = None ): self.success = success self.value = value def __nonzero__( self ): return self.success def getValue( self ): return self.value class TokenProvider: def __init__( self, tokens ): self.tokens = tokens self.index = 0 self.states = [] self.end_stream_token = Token( END_STREAM, 0, 2+3+2, "<<end>>" ) def hasMoreToken( self ): return self.index < len(self.tokens) def getNextToken( self ): if self.hasMoreToken(): token = self.tokens[ self.index ] self.index += 1 else: token = self.end_stream_token return token def pushState( self ): self.states.append( self.index ) def popState( self ): self.index = self.states.pop() def commitState( self ): self.states.pop() class Parser: """ Top-level class for all sub-parser. This implements a recursive descendent parser with infinite look-ahead ( LL(n) parser ). Notes that the Parser also overload the operator |, >> and []. They are not defined here for technical reason. 'parser1 | parser2' returns an AlternativeParser( parser1, parser2). If parser1 is already an AlternativeParser, then parser2 is added to the list of alternative of parser1. 'parser1 >> parser2' returns an SequenceParser( parser1, parser2). If parser1 is already an SequenceParser, then parser2 is added to the list of sub-parser of parser1. 'parser[action]' returns an ActionParser( parser, action ). action should be a callable which accepts the node returned by parser as parameter. if action is a string, then a NameAction(action) is automatically created to set the name of the node returned by parser. """ def __init__( self ): self.action_on_match = None # not use, use a decoarator parser instead (ActionParser) def parse( self, scanner ): """ Called to attempt to parse the next tokens provided by scanner. scanner should be a TokenProvider. Returns a Match to indicates success or error, and on success, store the 'node tree' representing the match the 'value' of Match. On match failure, the parser is responsible to restore the scanner to its previous position (see TokenProvider.pushState() & TokenProvider.popState()). The method matched() & notMatched() should be called to created the Match object. """ pass def matched( self, scanner, result_value ): """ Called on parse success to create the Match object to return. result_value is the node representing the matched parse tree. """ if self.action_on_match: result_value = self.action_on_match( result_value ) return Match( True, result_value ) def notMatched( self ): """ Called on parse failure to create the Match object.""" return Match( False ) class AlternativeParser(Parser): def __init__( self, alt1, alt2 ): Parser.__init__( self ) self.alternatives = [ alt1, alt2 ] def parse( self, scanner ): for alternative in self.alternatives: scanner.pushState() result = alternative.parse( scanner ) if result: scanner.commitState() return self.matched( scanner, result.getValue() ) scanner.popState() return self.notMatched() def addAlternative( self, alt ): self.alternatives.append( alt ) return self def __str__( self ): stringized = [] for parser in self.alternatives: stringized.append( str(parser) ) return "( %s )" % string.join( stringized, " | " ) class SequenceParser(Parser): def __init__( self, parser1, parser2 ): Parser.__init__( self ) self.subparsers = [ parser1, parser2 ] self.node_name = '?' def parse( self, scanner ): results = Node( self.node_name ) for subparser in self.subparsers: result = subparser.parse( scanner ) if not result: return self.notMatched() results.addChild( result.getValue() ) return self.matched( scanner, results ) def addSubParser( self, subparser ): self.subparsers.append( subparser ) return self def __str__( self ): stringized = [] for parser in self.subparsers: stringized.append( str(parser) ) return "( %s )" % string.join( stringized, " >> " ) class SymbolParser(Parser): def __init__( self, text ): Parser.__init__( self ) self.text = text def parse( self, scanner ): token = scanner.getNextToken() if token.getType() != SYMBOL or token.getText() != self.text: return self.notMatched() return self.matched( scanner, TokenNode( token ) ) def __str__( self ): return '"%s"' % self.text class ActionParser(Parser): def __init__( self, parser, action ): Parser.__init__( self ) self.parser = parser self.action = action def parse( self, scanner ): result = self.parser.parse( scanner ) if result: return self.matched( scanner, self.action( result.getValue() ) ) return self.notMatched() def __str__( self ): return "(%s)[action]" % str(self.parser) class NameAction: def __init__( self, name ): self.name = name def __call__( self, node ): node.setType( self.name ) return node def name_a( name ): return NameAction( name ) def make_p( parser ): if type(parser) == type(""): return SymbolParser( parser ) return parser # defined there so that Alternative and Sequence class have already been defined def Parser__or__( self, other ): other = make_p( other ) if isinstance( self, AlternativeParser ): return self.addAlternative( other ) else: return AlternativeParser( self, other ) def Parser__rshift__( self, other ): other = make_p( other ) if isinstance( self, SequenceParser ): return self.addSubParser( other ) else: return SequenceParser( self, other ) def Parser__getitem__( self, action ): if type(action) == type(""): action = NameAction( action ) return ActionParser( self, action ) def injectParserOperatorMethods( parser_class ): parser_class.__or__ = Parser__or__ parser_class.__rshift__ = Parser__rshift__ parser_class.__getitem__ = Parser__getitem__ for class_name in [ Parser, AlternativeParser, SequenceParser, SymbolParser, ActionParser ]: injectParserOperatorMethods( class_name ) class OptionalParser(Parser): def __init__( self, parser ): Parser.__init__( self ) self.optional_parser = parser def parse( self, scanner ): scanner.pushState() result = self.optional_parser.parse( scanner ) if result: scanner.commitState() return self.matched( scanner, result.getValue() ) scanner.popState() return self.matched( scanner, None ) def __str__( self ): return '%s?' % str(self.optional_parser) class RepeatParser(Parser): def __init__( self, min_count, parser ): Parser.__init__( self ) self.min_count = min_count self.parser = parser def parse( self, scanner ): results = Node( '?' ) repeat_count = 0 while True: scanner.pushState() result = self.parser.parse( scanner ) if not result: scanner.popState() break results.addChild( result.getValue() ) repeat_count += 1 if repeat_count >= self.min_count: return self.matched( scanner, results ) return self.notMatched() def __str__( self ): if self.min_count == 0: return '%s*' % self.parser elif min_count == 1: return '%s+' % self.parser return 'repeat(%d,%s)' % (self.min_count,str(self.parser)) class AnyUntil(Parser): def __init__( self, until_parser ): Parser.__init__( self ) self.until_parser = until_parser def parse( self, scanner ): results = Node( '?' ) while True: scanner.pushState() result = self.until_parser.parse( scanner ) scanner.popState() if result: break results.addChild( TokenNode( scanner.getNextToken() ) ) return self.matched( scanner, results ) def __str__( self ): return 'anyuntil( %s )' % str(self.until_parser) class TerminalParser(Parser): def __init__( self, type ): Parser.__init__( self ) self.type = type def parse( self, scanner ): token = scanner.getNextToken() if token.getType() != self.type: return self.notMatched() return self.matched( scanner, TokenNode( token ) ) def __str__( self ): return 'TOKEN(%s)' % str(self.type) class ZeroParser(Parser): def __init__( self ): Parser.__init__( self ) def parse( self, scanner ): token = scanner.getNextToken() if token.getType() != INTEGER or token.getText() != '0': return self.notMatched() return self.matched( scanner, TokenNode( token ) ) def __str__( self ): return '0' % str(self.type) class InlineChildAction: def __init__( self, node_index ): self.node_index = node_index def __call__( self, node ): inlined_node = node.getChildren()[ self.node_index ] node.getChildren()[self.node_index : self.node_index+1] = inlined_node.getChildren() return node class InlineChildrenAction: def __call__( self, node ): new_children = [] for child in node.getChildren(): if child.isLeaf(): new_children.append( child ) else: new_children.extend( child.getChildren() ) new_node = Node(node.getType(), new_children) return new_node def inline_child_at_a( node_index ): return InlineChildAction( node_index ) def inline_children_a(): """Takes all the children of the chil nodes and bring them up by one level. Root |-ChildNode1 | |+ChildNode1.1 | |+ChildNode1.2 |-ChildNode2 | |+ChildNode2.1 |-LeafNode3 => Root |+ChildNode1.1 |+ChildNode1.2 |+ChildNode2.1 |-LeafNode3 """ return InlineChildrenAction() end_p = TerminalParser( END_STREAM ) def symbol_p( symbol ): """ Matchs the specified symbol. symbol should be a string. """ return SymbolParser( symbol ) def maybe_p( parser ): """ Matchs an optional parser. parser should be a parser. """ return OptionalParser( make_p( parser ) ) def repeat_p( min_count, parser ): """ Matchs at leat min_count occurrences of parser.""" return RepeatParser( min_count, parser )[inline_children_a()] def list_p( repeated_parser, separator_parser ): """ Matchs a 'list' of 1 or more occurrences of repeated_parser separated by separator_parser. Examples: list_p( id_p, ',' ) will match 'v1,v2,v3'""" return (make_p(repeated_parser) >> \ repeat_p( 0, make_p(separator_parser) >> \ make_p(repeated_parser) ) )[ inline_child_at_a(1) ] def anyuntil_p( parser ): """ Matchs any tokens until parser returns a match.""" return AnyUntil( make_p( parser ) ) id_p = TerminalParser( IDENTIFIER ) eos_p = symbol_p( ';' ) nested_id_p = (maybe_p( "::" ) >> list_p( id_p, symbol_p('::') ))[inline_child_at_a(1)]['nested_id'] # use separate parser for inheritance => allow other alternative matching #template_instantiation_p = symbol_p("<") >> maybe_p( list_p( any_until_p( "," #inherited_parent_class_p = nested_id_p >> maybe_p(template_instantiation_p) null_statement_p = eos_p['null_statement'] basic_type_spec_p = symbol_p('char') | 'wchar_t' | 'bool' | 'short' | 'int' | 'long' \ | 'signed' | 'unsigned' | 'float' | 'double' | 'void' type_p = basic_type_spec_p type_suffix_p = repeat_p(0, symbol_p('*')) array_type_p = repeat_p(0, symbol_p('[') >> anyuntil_p( symbol_p(']') ) >> symbol_p(']')) #array_type_p = repeat_p(0, symbol_p('[') ) optional_initializer_p = maybe_p( symbol_p('=') >> anyuntil_p( symbol_p(',') | eos_p ) ) attribut_modifiers_p = repeat_p( 0, symbol_p('static') | 'mutable' | 'const' | 'volatile' ) #attribut_decl_p = attribut_modifiers_p >> type_p >> id_p >> array_type_p >> optional_initializer_p \ attribut_decl_p = attribut_modifiers_p >> type_p >> id_p >> array_type_p >> optional_initializer_p \ >> repeat_p( 0, symbol_p(',') >> type_suffix_p >> id_p >> array_type_p >> optional_initializer_p ) \ >> eos_p function_parameters_decl = symbol_p( '(' ) >> ')' # need to consume everything balancing brace cv_modifier_p = symbol_p('const') | 'volatile' member_initializers_p = symbol_p(':') >> anyuntil_p('{') statements_p = null_statement_p block_statement_p = symbol_p( '{' ) >> statements_p >> '}' function_body_p = block_statement_p member_function_modifiers_p = symbol_p('static') | 'virtual' function_decl_p = member_function_modifiers_p >> type_p >> id_p >> function_parameters_decl \ >> maybe_p( cv_modifier_p ) >> maybe_p( symbol_p('=') >> ZeroParser() ) \ >> ( ( maybe_p(member_initializers_p) >> function_body_p ) \ | eos_p ) friend_decl_p = symbol_p('friend') >> anyuntil_p( eos_p ) >> eos_p access_level_spec_p = (symbol_p("public") | "protected" | "private") >> ':' class_body_statements_p = null_statement_p \ | attribut_decl_p \ | access_level_spec_p \ | function_decl_p \ | friend_decl_p # enum, nested_class/struct, how ? class_inheritance_and_body_decl = symbol_p( ":" ) >> anyuntil_p( "{" ) >> class_body_statements_p >> "}" template_instantiation_param_p3 = anyuntil_p( symbol_p(">") ) template_instantiation_param_p2 = id_p >> "<" >> template_instantiation_param_p3 >> ">" template_instantiation_param_p = anyuntil_p( symbol_p(",") | template_instantiation_param_p2 ) \ >> maybe_p(template_instantiation_param_p2) class_specialization_p = symbol_p("<") >> list_p( template_instantiation_param_p, "," ) >> ">" class_decl_p = (symbol_p("class") | "struct") >> nested_id_p >> maybe_p(class_specialization_p) \ >> maybe_p( class_inheritance_and_body_decl ) >> eos_p statements = end_p \ | eos_p \ | TerminalParser( PP_DIRECTIVE ) \ | class_decl_p --- NEW FILE: parsertest.py --- import unittest import mock import parser import string class CppScannerTest(unittest.TestCase): """CppScanner tests""" def setUp( self ): self.scanner = parser.CppScanner() def tearDown( self ): self.scanner = None def testSkipSpaces( self ): self.checkEmptyParse( ' ' ) self.checkEmptyParse( ' ' ) self.checkEmptyParse( ' \t' ) self.checkEmptyParse( '\t \n' ) def testIdentifier( self ): self.checkOneToken( 'a', parser.IDENTIFIER ) self.checkOneToken( 'A', parser.IDENTIFIER ) self.checkOneToken( '_', parser.IDENTIFIER ) self.checkOneToken( '_1', parser.IDENTIFIER ) self.checkOneToken( 'za', parser.IDENTIFIER ) self.checkOneToken( 'zA', parser.IDENTIFIER ) self.checkOneToken( 'c3', parser.IDENTIFIER ) self.checkOneToken( 'c3z', parser.IDENTIFIER ) self.checkOneToken( 'c3_', parser.IDENTIFIER ) def testString( self ): self.checkOneToken( '""', parser.STRING ) self.checkOneToken( '"abc"', parser.STRING ) self.checkOneToken( '"\\""', parser.STRING ) self.checkOneToken( '"ab\\"cd\\""', parser.STRING ) def testChar( self ): # allow for more than actual 'char' but we don't need to bother about that self.checkOneToken( "''", parser.CHAR ) self.checkOneToken( "'a'", parser.CHAR ) self.checkOneToken( "'abc'", parser.CHAR ) self.checkOneToken( "'\\''", parser.CHAR ) def testInteger( self ): self.checkOneToken( '0', parser.INTEGER ) self.checkOneToken( '123456789', parser.INTEGER ) self.checkOneToken( '1234L', parser.INTEGER ) self.checkOneToken( '0x1234L', parser.INTEGER ) def testFloat( self ): self.checkOneToken( '0.0', parser.FLOAT ) self.checkOneToken( '0.0f', parser.FLOAT ) self.checkOneToken( '0.0F', parser.FLOAT ) self.checkOneToken( '0.0L', parser.FLOAT ) self.checkOneToken( '1344560.7894', parser.FLOAT ) self.checkOneToken( '1344560.7894e10', parser.FLOAT ) self.checkOneToken( '1344560.7894e-10', parser.FLOAT ) self.checkOneToken( '1344560.7894e+10', parser.FLOAT ) self.checkOneToken( '1344560e+10', parser.FLOAT ) self.checkOneToken( '.3', parser.FLOAT ) self.checkOneToken( '.3e10L', parser.FLOAT ) def testSymbol( self ): symbols = string.split( ':: << >> <= >= || && &= |= += -= /= *= ^= ' \ '( ) [ ] { } ~ / - + / * = ^ % < > & | ; , : ? ' \ 'if else break continue for while do void int' ) for symbol in symbols: self.checkOneToken( symbol, parser.SYMBOL ) def testCppComment( self ): self.checkEmptyParse( '//' ) self.checkEmptyParse( '////' ) self.checkEmptyParse( '////\n' ) def testCComment( self ): self.checkEmptyParse( '/**/' ) self.checkEmptyParse( '/* //\n */' ) def testPreprocessorDirective( self ): self.checkOneToken( '#include <vector>', parser.PP_DIRECTIVE ) self.checkOneToken( '#include <vector>\n', parser.PP_DIRECTIVE ) self.checkOneToken( '#define macro(x,y) \\n ((x) * (y))\n', parser.PP_DIRECTIVE ) self.checkOneToken( '#define macro(x,y) \\n /* ((x) * (y))\n // cpp comment\n */ x*y\n', parser.PP_DIRECTIVE ) def testTokenStream( self ): self.checkManyTokens( 'namespace FS = boost::filesystem;', \ [ ('namespace',parser.SYMBOL,1), ('FS',parser.IDENTIFIER,1), ('=',parser.SYMBOL,1), ('boost',parser.IDENTIFIER,0), ('::',parser.SYMBOL,0), ('filesystem',parser.IDENTIFIER,0), (';',parser.SYMBOL,0) ] ) def checkEmptyParse( self, text ): tokens = self.scanner.tokenize( text ) self.assertEqual( [], tokens ) def checkOneToken( self, text, token_type ): expected_tokens = [ parser.Token( token_type, 0, len(text), text ) ] actual_tokens = self.scanner.tokenize( text ) self.assertEqual( expected_tokens, actual_tokens ) def checkManyTokens( self, text, tokens_def ): expected_tokens = [] pos = 0 for token_def in tokens_def: expected_tokens.append( parser.Token( token_def[1], pos, pos+len(token_def[0]), text ) ) pos += len(token_def[0]) + token_def[2] actual_tokens = self.scanner.tokenize( text ) self.assertEqual( expected_tokens, actual_tokens ) # for index in range(0,len(expected_tokens)): # self.assertEqual( expected_tokens[index], actual_tokens[index] ) class CppParserTest(unittest.TestCase): """CppParserTest tests""" def setUp( self ): self.scanner = parser.CppScanner() def tearDown( self ): self.scanner = None def testParseBasicParsers( self ): self.checkParser( "MyClass", parser.id_p ) self.checkParser( ",", parser.symbol_p(',') ) self.checkParser( "MyClass MyItem", parser.repeat_p( 0, parser.id_p ) ) self.checkParser( "v1,v2,v3", parser.list_p( parser.id_p, ',' ) ) self.checkParser( "int **myVar[7][12]", parser.type_p >> parser.type_suffix_p >> parser.id_p >> parser.array_type_p ) self.checkParser( "::NameSpace::MyClass::myVar", parser.nested_id_p ) def checkParser( self, text, tested_parser ): tokens = self.scanner.tokenize( text ) scanner = parser.TokenProvider( tokens ) match = tested_parser.parse( scanner ) self.assert_( match, "nested_id_p should have matched source" ) remaining_tokens = [] while scanner.hasMoreToken() : remaining_tokens.append( scanner.getNextToken() ) self.assertEqual( [], remaining_tokens ) if match.getValue(): print "Parsing '%s'" % text match.getValue().prettyPrint( 4 ) return match.getValue() def suite(): return unittest.makeSuite(CppScannerTest) if __name__ == '__main__': unittest.TextTestRunner().run(suite()) --- NEW FILE: project.py --- import refactoring class Project def getActiveDocument( self ): return None # to implement def getRefactoring( self, refactoring_name ): return refactoring.refactoring_classes[ refactoring_name ]( self ) --- NEW FILE: refactoring.py --- # registry for refactoring classes (name => class) refactoring_classes = {} class Refactoring def __init__( self, project ): self.project = project self.properties = {} self.actions = {} def setProperty( self, property_name, value ): self.properties[ property_name ] = value def getProperty( self, property_name ): return self.properties[ property_name ] def doAction( self, action_name ): self.actions[ action_name ]() def setAction( self, action_name, action_fn ): self.actions[ action_name ] = action_fn class RenameLocalVariableRefactoring(Refactoring): def __init__( self, project ): Refactoring.__init__( self, project ) setAction( "analyse", self.analyse ) setAction( "apply", self.apply ) # input properties: # "document": document containing the variable to rename # "location": index of the local variable (may be at the beginning or within the variable text # output properties: # "count": number of occurrences found # "original_name": local variable name def analyse( self ): # self.ensurePropertySet( "location" ) self.analysis = self.project.getAnalysisInfo( self.project.getActiveDocument() ) self.function_body = self.analysis.getFunctionBodyContainning( self.getProperty( "location" ) ) #should that there is a function body self.variable_info = self.analysis.getSymbolAt( self.getProperty( "location" ) ) usages = self.variable_info.getOccurrences() new_name = self.getProperty( "new_name" ) for usage in usages: usage.setName( new_name ) # input properties: # "document": document containing the variable to rename # "location": index of the local variable (may be at the beginning or within the variable text # "new_name": new local variable name # output properties: def apply( self ): pass refactoring_classes["RenameLocalVariable"] = RenameLocalVariableRefactoring |