From: <sg...@us...> - 2003-09-27 22:37:52
|
Update of /cvsroot/libfunutil/libfunutil/lib/s11n/parsers In directory sc8-pr-cvs1:/tmp/cvs-serv8145/lib/s11n/parsers Added Files: .indent.pro Makefile common_flex_definitions.in flex_lexers.cpp flex_lexers.h funtxt.flex.at funxml.flex.at select_lexer.flex.at simplexml.flex.at Log Message: egg --- NEW FILE: .indent.pro --- -i8 -bl -bli0 -sob0 --use-tabs --line-length 200 --no-space-after-function-call-names --space-after-parentheses //--no-space-after-parentheses // --blank-lines-after-declarations --dont-break-function-decl-args --dont-break-function-decl-args-end //--dont-break-procedure-type --no-parameter-indentation --leave-optional-blank-lines --- NEW FILE: Makefile --- include toc.make SOURCES = \ flex_lexers.cpp HEADERS = \ flex_lexers.h INSTALL_PACKAGE_HEADERS = $(HEADERS) FLEXES = funtxt funxml select_lexer simplexml SOURCES_FLEX = $(addsuffix .flex.cpp,$(FLEXES)) FLEXES_ARGS = -p -+ funtxt_FLEXES_ARGS = -B -Psertxt funxml_FLEXES_ARGS = -B -Pserxml cli_FLEXES_ARGS = -B -Pcli select_lexer_FLEXES_ARGS = -B -Pselect_lexer simplexml_FLEXES_ARGS = -B -Psimplexml include $(toc_makesdir)/flex.make COMMON_FLEX_DEFS = common_flex_definitions.in flex_definitions = include:COMMON_DEFINITIONS=$(COMMON_FLEX_DEFS) funtxt.flex: funtxt.flex.at $(COMMON_FLEX_DEFS) Makefile @$(call toc_atparse_file,$<,$@, \ $(flex_definitions) \ ) || exit; touch $@ funxml.flex: funxml.flex.at $(COMMON_FLEX_DEFS) Makefile @$(call toc_atparse_file,$<,$@, \ $(flex_definitions) \ ) || exit; touch $@ select_lexer.flex: select_lexer.flex.at $(COMMON_FLEX_DEFS) Makefile @$(call toc_atparse_file,$<,$@, \ $(flex_definitions) \ ) || exit; touch $@ simplexml.flex: simplexml.flex.at $(COMMON_FLEX_DEFS) Makefile @$(call toc_atparse_file,$<,$@, \ $(flex_definitions) \ ) || exit; touch $@ %.flex: Makefile OBJECTS = $(patsubst %.cpp,%.o,$(SOURCES) $(SOURCES_FLEX)) INSTALL_PACKAGE_HEADERS = $(HEADERS) # DIST_FILES += $(sort $(wildcard *.cpp) $(wildcard *.h) $(wildcard *.make)) DIST_FILES += $(SOURCES) $(HEADERS) \ $(addsuffix .flex.at,$(FLEXES)) $(COMMON_FLEX_DEFS) strip: $(THISLIB_STATIC) $(THISLIB_SHARED) $@ $(THISLIB_STATIC) $(THISLIB_SHARED) SYMLINK_HEADERS = $(INSTALL_PACKAGE_HEADERS) SYMLINK_HEADERS_DEST = $(top_srcdir)/include/s11n include $(toc_makesdir)/symlink_headers.make all: symlink-headers $(OBJECTS) select_lexer_bin_SOURCES = select_lexer.flex.cpp select_lexer_bin_CFLAGS = $(INCLUDES) -DSELECT_LEXER_DO_MAIN=1 -g select_lexer_bin_OBJECTS = $(OBJECTS) select_lexer_bin_LFLAGS = -lstdc++ $(LIBREADLINE_LDADD) select_lexer: select_lexer.flex all Makefile $(call toc_compile_c_binary,select_lexer) simplexml_bin_SOURCES = simplexml.flex.cpp simplexml_bin_CFLAGS = $(INCLUDES) -DSIMPLEXML_DO_MAIN=1 -g simplexml_bin_OBJECTS = $(OBJECTS) simplexml_bin_LFLAGS = -lstdc++ $(LIBREADLINE_LDADD) simplexml: simplexml.flex all Makefile $(call toc_compile_c_binary,simplexml) DIST_FILES += $(SOURCES) $(HEADERS) \ $(addsuffix .flex.at,$(FLEXES)) $(COMMON_FLEX_DEFS) CLEAN_FILES += *.o --- NEW FILE: common_flex_definitions.in --- SPACE [ \t] NONSPACE [^ \t] WORD [_[:alnum:]]+ WORDS (({WORD}{SPACE}){2,}) START_OF_LINE ^({SPACE}*) DIGIT [0-9] INTEGER ({DIGIT}+) DOUBLE_QUOTED_STRING ([\"]([^\"]|(\\\"))+[\"]) SINGLE_QUOTED_STRING ([\'][^\']*[\']) QUOTED_STRING ({SINGLE_QUOTED_STRING}|{DOUBLE_QUOTED_STRING}) // QUOTED_STRING: doesn't yet handle escaped quotes-in-quotes. Need to use //# separate states for that, i think. NUMBER_type1 [-+]?{DIGIT}+\.?([eE][-+]?{DIGIT}+)? NUMBER_type2 [-+]?{DIGIT}*\.{DIGIT}+([eE][-+]?{DIGIT}+)? NUMBER ({NUMBER_type1}|{NUMBER_type2}) CLASSNAME (({WORD}\:\:)+)?{WORD} ALMOST_A_WORD [\._a-zA-Z0-9]+ VARNAME_LENIENT ([a-zA-Z_][\.\-_a-zA-Z0-9]*|{CLASSNAME}) HEX_DIGIT ([a-fA-F0-9]) RGB_COLOR (#{HEX_DIGIT}{6}) SEMICOLON ({SPACE}*;+{SPACE}*) // ESCAPED_MULTILINE ((.*\$)/[^(\\\n)]*) // ESCAPED_MULTILINE ([.]+([^\\]\n$)) // {ESCAPED_MULTILINE}| // PROPERTY_TYPES ({ESCAPED_MULTILINE}) //UNTIL_SEMICOLON .+\;{SPACE}*$ //PROPERTY_VALUE ({NUMBER}|{ALMOST_A_WORD}|{QUOTED_STRING}|{WORD_WITH_PUNCTUATION}|{RGB_COLOR}) // WORD_WITH_PUNCTUATION [#.\!\?\-_a-zA-Z0-9]+ //UNTIL_EOL ([.\n]+[^\\]$) UNTIL_SEMICOLON (.+;) //ESCAPED_LINES ((.+([\\]\n))+[^\\]\n) PROPERTY_DECL_RULES ({QUOTED_STRING}|{ALMOST_A_WORD}|{WORDS}|{RGB_COLOR}) PROP_DECL_EQUALS ({WORD}{SPACE}*={SPACE}*) PROP_DECL_SPACE ({WORD}{SPACE}+) PROPERTY_DECLS ({PROP_DECL_EQUALS}|{PROP_DECL_SPACE}) PROPERTY_DEFINITION ({PROPERTY_DECLS}{PROPERTY_DECL_RULES}) // \<[^/][^\>]+\> { add_token( elib::efstring( "opening ",YYText() ) ); } // \<\/[^\>]+\> { add_token( elib::efstring( "closing ",YYText() ) ); } --- NEW FILE: flex_lexers.cpp --- /** Author: stephan beal <sg...@us...> License: Public Domain */ #include "flex_lexers.h" #include <cassert> #include <s11n/s11n-macros.h> // COUT /** stubs for dupe code from the lexers... */ namespace s11n { FlexTreeBuilder::FlexTreeBuilder( ):m_lexer( 0 ), m_lexer_inited( false ), m_builder( 0 ) { } FlexTreeBuilder::~FlexTreeBuilder( ) { this->cleanup( ); } FlexLexer *FlexTreeBuilder::lexer( ) { //COUT << "lexer() @ " << std::hex<< m_lexer<< ""<<std::endl; return this->m_lexer; } void FlexTreeBuilder::lexer( FlexLexer * newlexer ) { delete( this->m_lexer ); this->m_lexer = newlexer; } s11n::S11nNode * FlexTreeBuilder::root_node( ) { if ( !this->m_builder ) return NULL; return this->m_builder->root_node( ); } int FlexTreeBuilder::lexer_loop( ) { FlexLexer *l = this->lexer( ); int ret; //COUT << "lexer_loop()"; while ( 0 != ( ret = l->yylex( ) ) ) { //std::cout << (char) ret; } //std::cout << std::endl; return ret; } void FlexTreeBuilder::reset( ) { if ( m_builder ) delete( m_builder ); m_builder = new s11n::S11nNodeBuilder( ); m_lexer_inited = true; } void FlexTreeBuilder::cleanup( ) { delete( m_builder ); m_builder = 0; m_lexer_inited = false; delete( this->m_lexer ); this->m_lexer = 0; } s11n::S11nNodeBuilder * FlexTreeBuilder::builder( ) { if ( !m_lexer_inited ) this->reset( ); return m_builder; } bool FlexTreeBuilder::load( std::istream & in ) { // UNTESTED if ( !in ) return false; FlexLexer *fl = this->lexer( ); fl->switch_streams( &in ); int ret; //COUT << "load( istream & )... "; ret = this->lexer_loop( ); //COUT << "done." << endl; //COUT << "ret="<<ret<<endl; if ( 0 != ret ) { this->reset( ); } return ( 0 == ret ); } } // namespace s11n --- NEW FILE: flex_lexers.h --- /** Author: stephan beal <sg...@us...> License: Public Domain */ #ifndef FLEX_FUNTXT_H_INCLUDED #define FLEX_FUNTXT_H_INCLUDED 1 #include <string> #include <iostream> #ifndef FLEX_SCANNER #include <FlexLexer.h> #endif #include <s11n/S11n.h> #include <s11n/S11nNodeBuilder.h> #define LEXER_LOUD 1 #if LEXER_LOUD # include <s11n/s11n-macros.h> // COUT/CERR #endif #define LOUT if(LEXER_LOUD) CERR #define lout if(LEXER_LOUD) cerr namespace s11n { /** FlexTreeBuilder declares an EXPERIMENTAL quasi-interface into the libfun/s11n-related flex parsers. It is meant to be included by the parsers and their clients. The ground rules are described in the FlexTreeBuilder struct, but the same notes apply to all the lexers which subclass it. (Why all this hassle? Because getting multiple flex-based parsers into one library is more painful than it really ought to be, and the FlexLexer subclasses must be well-hidden from clients to avoid potential compilation problems.) Much of this code is implemented in flex_lexers.cpp, but some of it must be implemented by the specific lexers and lives elsewhere. Big Fat Warning: This interface is COMPLETELY not thread-safe! Much of the internal data used by the lexers is global-scope and multiple lexers, even of different types, should NOT be used in parallel! This can be considered a bug, and this limitation may be addressed (i.e., put off again) at some point. Usage notes: - You must never delete any lexer you fetch via lexer(). - You should call FlexTreeBuilder::cleanup() when you're done so the tree builder can be deleted. Deleting a builder will clean it up, obviously. If you need to hold on to it's output you can call builder()->autoDelete(false) to keep it from deleting the parsed-in node(s). - As in XML, subclasses should assume that each input block contains exactly one root node, and stop parsing after processing one node. (This is purely a point of long-standing conventions.) Developer notes: - see the samples in {cli,fun{txt,xml}}.flex.at. todo: investigate making this a subclass of S11nNodeBuilder. */ class FlexTreeBuilder { // implementation lives in flex_lexers.cpp public: virtual ~ FlexTreeBuilder( ); /** FlexLexer * lexer() Returns the FlexLexer parser set via lexer( FlexLexer * ), which subclasses should call to set the proper lexer. The default behaviour is to return NULL. This object is absolutely useless until the lexer() is set. The caller DOES NOT own the returned pointer. lexer implementors: you may want to do any one-time initialization of your lexer in the first call to lexer(). This is the only function which provides public access to your lexer, so doing any initialization here will ensure that it gets called before the lexer is actually used. Typically a call to reset() should be done here, as that creates a new internal tree builder (see builder(), below). By convention (the one set by FunXML and FunTxt, that is), a lexer should generally stop lexing after reading one DOM node. This assumes that, as in XML, there is one root node in any input. This model appears to work well for a wide variety of cases (the majority, i would say), so please don't let this minor limitation bother you (if it makes it any better, it's really up to the lexer what defines "a root node", and it could even artificially wrap it's output in an artificial root if it needed to). That said, the S11nNodeBuilder interface /should/ work with multiple root nodes, but it has only vaguely been tested. After a lexer()->yylex() loop, if parsing got a root node it will be available via root_node(). */ virtual FlexLexer *lexer( ); /** This function is simply a shortcut for builder()->root_node(). The default condition is that the caller does not own the returned pointer. The caller owns the given pointer if this->builder()->autoDelete() returns false, otherwise the pointer will be deleted when this object is cleaned up (via cleanup() or deletion). Typically clients should call this after a lexer().yylex() loop to fetch the object the parser collected. */ s11n::S11nNode * root_node( ); /** Returns the current dom builder, which gets (or should get) populated by this object's lexer(). You can get much more info about the tree via this object, but it is rarely necessary to do so. The caller does not own the returned pointer. */ s11n::S11nNodeBuilder * builder( ); /** Resets the parser to use a new builder. This deletes the object returned by builder(), so be careful not to hang on to that pointer too long. It is intended to be used by subclasses but may have uses in other contexts. */ virtual void reset( ); /** Should be called after you are done lexing and retrieving the nodes so the lexer can free the builder (which also frees any loaded nodes). */ virtual void cleanup( ); /** Convenience function: does a while(lexer()->yylex() != 0) loop and returns the last code returned by yylex(). If parsing gets a root node it will be available via root_node() or via the builder() object. */ int lexer_loop( ); /** convenience function: parses the given input stream and stops parsing when this->lexer() is done with it. This normally means it stops after matching one input node. Note that the "one node rule" is conventional only: subclasses may choose to parse more than one node out of the input stream before returning. s11n convention, however, is to assume that any loaded data is contained in one root node. If the load fails then it is still possible that the builder() has some content, but it may be in an undefined state. */ bool load( std::istream & in ); /** Subclasses need to call this, passing their proper lexer type. It will delete any previously-assigned object. It may be useful for non-subclasses to call this, but it certainly only for unusual or particularly complex. */ void lexer( FlexLexer * lexer ); /** loadBuilder() looks at the first line of the given file and tries to select an appropriate lexer for the token. It returns a pointer, perhaps NULL, which the caller takes responsibility for. If it finds a lexer but it finds no parseable tokens it will return NULL. If it finds a lexer it passes the file stream to the lexer, which will then populate the new builder object. If that fails then the builder is deleted and NULL is returned. Developer notes: This function WOULD take an istream, but i can't(?) reliably(?) read from it twice (once to get the magic cookie and once to feed the whole thingto the parser). Reading JUST the cookie, then passing the stream on to a different lexer for parsing screws up the stream (i'm theorizing that it's a side-effect of FlexLexer::switch_streams()'s deletion behaviour, but haven't investigated it fully). This function is implemented in select_lexer.flex.at. */ static FlexTreeBuilder *loadBuilder( const std::string & filename ); protected: /** Only subclasses may instantiate objects of this class. */ FlexTreeBuilder( ); private: FlexLexer * m_lexer; bool m_lexer_inited; s11n::S11nNodeBuilder * m_builder; }; /** FunXML is for parsing what's known variably as "text", "serial-text" and "Rusty's" format. It's a simple-grammared text-based DOM which is easy for humans to read and hand-edit, as well as fairly easy to parse. This format is used by the QUB project (qub.sourceforge.net) and libFunUtil (libfunutil.sourceforge.net). */ class FunTxt:public FlexTreeBuilder { // implementation lives in FunTxt.flex.at public: FunTxt( ) { }; virtual ~ FunTxt( ) { }; virtual FlexLexer *lexer( ); virtual void reset( ); }; /** FunXML is for parsing what's sometimes affectionately known as "fun-xml format", which is a very limited dialect of XML used extensively by the QUB project (qub.sourceforge.net) and libFunUtil (libfunutil.sourceforge.net). Known "Potential Problems": - does not yet properly translate any characters, like > and <. i.e., it doesn't translate at all. */ class FunXML:public FlexTreeBuilder { // implementation lives in FunXML.flex.at public: FunXML( ) { }; virtual ~ FunXML( ) { }; virtual FlexLexer *lexer( ); virtual void reset( ); }; /** This flexer parses a more complete dialect of XML than FunXML. Because S11nNode uses key=val properties to store all aribtrary data, and does not directly support the notion of CDATA, this class does some special handling of the "CDATA" property in deserialized nodes, and the SerializerSimpleXML does the appropriate fiddling on the other end. i don't want to add a cdata-style type to the lower-level interface because it would only be there to accomodate XML, yet clients should not need to know they are working with XML. That said, however, XML-specific clients which use this class need to be aware that the CDATA is stored in the CDATA property of S11nNodes which are deserialized by this class (e.g., available via node.getString("CDATA")). */ class SimpleXMLFlexer:public FlexTreeBuilder { // implementation lives in simplexml.flex.at public: SimpleXMLFlexer( ) { }; virtual ~ SimpleXMLFlexer( ) { }; virtual FlexLexer *lexer( ); virtual void reset( ); }; }; // namespace s11n #endif // FLEX_FUNTXT_H_INCLUDED --- NEW FILE: funtxt.flex.at --- %option c++ %{ /** @LICENSE */ /** my first lex :) This code is for parsing Rusty Ballinger's "text mode serialization format" (as we call it). Here's a rough spec: nodename class=ClassName { property1_name property1_value property2_name "value with spaces" propertyN_name 'or single quotes' foo this \ line is split \ with backslashes. # comment lines nodename class=SomeClass { ... } } Extensions to the original format, probably not tolerated by older code and should probably be left out: - properties can be declared with an optional '=' between the key and value, if that makes you feel better. - Trailing semicolons are optional after '}' and properties. - comment lines can now start with ; or # or // - C++-style comment blocks are supported. See node_functions.h for the callbacks which this parser expects to send output to. Known problems: - If node_depth() is non-zero when this code starts parsing then it does not work properly at all. */ #define YY_SKIP_YYWRAP 1 int yywrap() { return 1; } // #include <stdio.h> #include <cassert> #include <iostream> #include <string> #include <deque> #include <s11n/s11n-macros.h> // COUT/CERR #include <s11n/s11n_globals.h> // trim_string() #include <s11n/PropertiesProvider.h> #include <s11n/ClassLoader.h> #include <s11n/Instantiator.h> #include <s11n/KeyValueParser.h> #include <s11n/S11n.h> // S11nNode and friends. #include <s11n/S11nNodeBuilder.h> #include <s11n/flex_lexers.h> using std::cin; using std::cerr; using std::cout; using std::endl; /** big todos: - use proper lex STATES instead of assert_depth, if feasible. - add syntax extension: nodename class=SomeClass N where N is a number. That is, the node is replicated N times into the output. - add syntax extension: # nodename class=Foo will comment out the whole class block. */ namespace FunTxti { unsigned long ignored; unsigned long bracedepth; s11n::S11nNodeBuilder * serbuilder; std::string nodename; std::string nodeclass; static s11n::KeyValueParser kvp; bool parseKVP( const std::string & str ) { std::string val = str; s11n::trim_string( val ); std::string delim = " "; if( val.find( "=" ) != val.npos /* < val.find( " " ) */ ) delim = "="; //LOUT << "parseKVP:=["<<val<<"] delim='"<<delim<<"'"<<endl; if( ! kvp.parse( val, delim ) ) return false; val = kvp.value(); s11n::normalize_string( val ); if( ';' == val[val.size()-1] ) val.resize( val.size()-1 ); //LOUT << "stripped value=["<<val<<"] from ["<<kvp.value()<<"]"<<endl; kvp.value( val ); return true; } }; // FunTxt_internal FunTxti; #define assert_depth if( FunTxti::bracedepth != FunTxti::serbuilder->node_depth() ) { ++FunTxti::ignored; return 1; } namespace s11n { FlexLexer * FunTxt::lexer() { FlexLexer * fp = 0; fp = this->FlexTreeBuilder::lexer(); if( fp ) return fp; // else first-time setup: this->reset(); return this->FlexTreeBuilder::lexer(); } void FunTxt::reset() { this->FlexTreeBuilder::reset(); //this->lexer( new sertxtFlexLexer() ); // gcc 3.3 bitches about this.no matching function for call to `FunTxt::lexer(FlexLexer*&)' FlexLexer * foo = new sertxtFlexLexer(); this->FlexTreeBuilder::lexer( foo ); FunTxti::serbuilder = this->builder(); FunTxti::ignored = 0; FunTxti::bracedepth = 0; } } // namespace s11n %} @COMMON_DEFINITIONS@ // COMMON_DEFINITIONS: See common_flex_definitions.in %% "/*""*"* { // there is apparently an endless loop caused in some cases with this :/ // c++-style comments. Code mostly taken from the flex info pages. ++FunTxti::ignored; int c; while((c = yyinput()) != 0) { if(c == '*') { c = yyinput(); if( 0 == c || '/' == c ) break; //??? else unput(c); } //lout << (char)c; } //lout << "\n"; return 1; } ({SPACE}*)([;#]|\/).* { ++FunTxti::ignored; return 1; /* single-line comment */ } ({SPACE}*){WORD}{SPACE}+{WORD}={CLASSNAME} { // FunTxti::nodename class=foo::Bar std::string foo = YYText(); s11n::trim_string( foo ); //LOUT << "class dec token=["<<foo<<"]"<<endl; FunTxti::nodename = foo.substr( 0, foo.find_first_of( " \t" ) ); FunTxti::nodeclass = foo.substr( foo.find( "=" ) + 1 ); int ret = FunTxti::serbuilder->open_node( FunTxti::nodeclass, FunTxti::nodename ) ? 1 : -1; if( (ret == -1) ) { // LOUT << "lexer: fail option is enabled, so i'm failing on the first failed open_node(). error token is:" << endl; LOUT << foo << endl; return 0; } //LOUT << FunTxti::bracedepth << " opening class node " << FunTxti::nodeclass << " : " << FunTxti::nodename << std::endl; return ret; } \{ { // node's opening brace ++FunTxti::bracedepth; // nothing. return 1; } \}({SEMICOLON})? { // node's closing brace if( FunTxti::bracedepth == FunTxti::serbuilder->node_depth() ) { // avoid closing node when open_node() fails //LOUT << FunTxti::bracedepth-1<<" closing node"<<std::endl; FunTxti::serbuilder->close_node(); } --FunTxti::bracedepth; if( 0 == FunTxti::serbuilder->node_depth() ) { // return once we close the first top-level node. //if( FunTxti::ignored ) LOUT << "lexer ignored " << FunTxti::ignored << " token"<<(FunTxti::ignored!=1?"s":"") << endl; return 0; } //LOUT << "node depth="<<FunTxti::serbuilder->node_depth()<<" brace_depth="<<FunTxti::bracedepth<<std::endl; //return 0; } {PROPERTY_DECLS} { // property_name [=]? value assert_depth; std::string foo = YYText(); //LOUT << "property decl: " << foo << endl; int c1, c2; c2 = 0; while((c1 = yyinput()) != 0) { // grab \-escaped newlines //lout << (char)c1; if( (c1 != '\n') || ('\n' == c1 && '\\' == c2) ) { foo += c1; c2 = c1; } else break; } //lout << endl; //LOUT << "property token=["<<foo<<"]"<<endl; if( FunTxti::parseKVP( foo ) ) { FunTxti::serbuilder->add_property( FunTxti::kvp.key(), FunTxti::kvp.value() ); return 1; } LOUT << "failed parsing key/value pair from property token ["<<foo<<"]"<<endl; return -1; } .|\n|{SPACE}+ {;} %% --- NEW FILE: funxml.flex.at --- %option c++ %{ /** @LICENSE */ /** */ #define YY_SKIP_YYWRAP 1 int yywrap() { return 1; } #include <cassert> #include <iostream> #include <string> // #include <s11n/s11n-macros.h> // COUT/CERR #include <s11n/flex_lexers.h> using std::cin; using std::cerr; using std::cout; using std::endl; class FunXML_internal { public: unsigned long bracedepth; std::string nodename; std::string nodeclass; std::string yystr; std::string cdata; s11n::S11nNodeBuilder * builder; }; // struct FunXML_internal FunXML_internal FunXMLi; namespace s11n { FlexLexer * FunXML::lexer() { FlexLexer * fp = 0; fp = this->FlexTreeBuilder::lexer(); if( fp ) return fp; // else first-time setup: this->reset(); return this->FlexTreeBuilder::lexer(); } void FunXML::reset() { this->FlexTreeBuilder::reset(); FunXMLi.bracedepth = 0; this->FlexTreeBuilder::lexer(new serxmlFlexLexer()); FunXMLi.builder = this->builder(); } } // namespace s11n %} @COMMON_DEFINITIONS@ // COMMON_DEFINITIONS: See common_flex_definitions.in %% \<{WORD}{SPACE}+"class=\""{CLASSNAME}"\""{SPACE}*\> { // opening a node ++FunXMLi.bracedepth; FunXMLi.yystr = YYText(); //COUT << "class node? "<<FunXMLi.yystr<<std::endl; std::string::size_type opos = FunXMLi.yystr.find( "class=\"" ) + 7; std::string::size_type cpos = FunXMLi.yystr.find( "\"", opos ); FunXMLi.nodeclass = FunXMLi.yystr.substr( opos, cpos - opos ); FunXMLi.nodename = FunXMLi.yystr.substr( 1, FunXMLi.yystr.find_first_of( " \t\n" ) - 1 ); FunXMLi.builder->open_node(FunXMLi.nodeclass, FunXMLi.nodename); continue; } \<[^/]{WORD}\> { // opening a property FunXMLi.cdata = ""; ++FunXMLi.bracedepth; } \<\/{WORD}\> { // closing something FunXMLi.yystr = YYText(); if( FunXMLi.bracedepth != FunXMLi.builder->node_depth() ) { // closing a property. std::string prop = FunXMLi.yystr.substr( 2, FunXMLi.yystr.size() - 3 ); FunXMLi.builder->add_property( prop, FunXMLi.cdata ); } else { // closing an object node. FunXMLi.builder->close_node(); } --FunXMLi.bracedepth; FunXMLi.cdata = ""; if( 0 == FunXMLi.builder->node_depth() ) { // return once we close the first top-level node. return 0; } } .|\n|{SPACE} { FunXMLi.cdata += YYText();} %% --- NEW FILE: select_lexer.flex.at --- %option c++ %{ /** @LICENSE */ /** Searchs the first line of input (consuming it) for a "magic cookie" by which to identify a lexer. */ #define YY_SKIP_YYWRAP 1 int yywrap() { return 1; } #include <cassert> #include <iostream> #include <string> #include <sstream> #include <s11n/s11n-macros.h> // COUT/CERR // #include <s11n/S11nNodeBuilder.h> #include <s11n/flex_lexers.h> #include <s11n/s11n_globals.h> // first_token_of_file() #if HAVE_CONFIG_H # include "config.h" #endif #if HAVE_ZLIB # include <zlib.h> # include <s11n/gzstream.h> #else # include <fstream> #endif using std::cin; using std::cerr; using std::cout; using std::endl; namespace s11n { FlexTreeBuilder * m_selected_builder = 0; FlexTreeBuilder * FlexTreeBuilder::loadBuilder( const std::string & fname ) { m_selected_builder = 0; std::string token = s11n::first_token_of_file( fname, true ); //COUT << "loadBuilder() magic cookie="<<token<<std::endl; std::istringstream cookiestream(token); select_lexerFlexLexer fl; fl.switch_streams( &cookiestream, 0 ); fl.yylex(); //COUT << "m_selected_builder="<<hex<<m_selected_builder<<""<<std::endl; if( ! m_selected_builder ) return NULL; #if HAVE_ZLIB s11n::igzstream is( fname.c_str() ); #else std::ifstream is( fname.c_str() ); #endif bool killit = false; FlexTreeBuilder * ret = m_selected_builder; if( ! ret->load( is ) ) { killit = true; } //COUT << killit << endl; if( ! m_selected_builder->root_node() ) { // no point, is there? killit = true; } //COUT << killit << endl; if( killit ) { delete( m_selected_builder ); m_selected_builder = 0; ret = 0; } //COUT << "selected builder="<<hex<<ret<<""<<std::endl; return ret; } }; %} @COMMON_DEFINITIONS@ // COMMON_DEFINITIONS: See common_flex_definitions.in %% ^"<!DOCTYPE SerialTree>" { // fun-xml s11n::m_selected_builder = new s11n::FunXML(); return 0; } ^"#SerialTree 1" { // fun-txt s11n::m_selected_builder = new s11n::FunTxt(); return 0; } ^"<!DOCTYPE s11n::simplexml>" { // s11n s11n::m_selected_builder = new s11n::SimpleXMLFlexer(); return 0; } .|\n { s11n::m_selected_builder = NULL; return 0; } %% #if SELECT_LEXER_DO_MAIN int main() { FlexLexer * fl = new select_lexerFlexLexer(); while( fl->yylex() ); delete( fl ); } #endif --- NEW FILE: simplexml.flex.at --- %option c++ %{ /** @LICENSE */ /** This flexer builds S11nNode trees out of a subset of XML. Most basic XML constructs are supported. XML CDATA, which is not directly supported by the S11nNode interface, is stored in the property named "CDATA", available via S11nNode::getString("CDATA"). */ #define YY_SKIP_YYWRAP 1 int yywrap() { return 1; } #include <cassert> #include <iostream> #include <string> #include <stack> #include <s11n/s11n_globals.h> // normalize_string() #include <s11n/flex_lexers.h> #include <s11n/KeyValueParser.h> using std::cin; using std::cerr; using std::cout; using std::endl; class simplexml_internal { public: unsigned long bracedepth; std::string nodename; std::string nodeclass; std::string yystr; //std::string cdata; s11n::S11nNodeBuilder * builder; typedef std::stack<std::string> StringStack; StringStack cdata; void cleanup() { nodename = ""; nodeclass = ""; yystr = ""; for( StringStack::size_type i = 0; i < cdata.size(); ++i ) cdata.pop(); // builder will be freed elsewhere. } }; // struct simplexml_internal simplexml_internal sxmli; namespace s11n { FlexLexer * SimpleXMLFlexer::lexer() { FlexLexer * fp = 0; fp = this->FlexTreeBuilder::lexer(); if( fp ) return fp; // else first-time setup: this->reset(); return this->FlexTreeBuilder::lexer(); } void SimpleXMLFlexer::reset() { this->FlexTreeBuilder::reset(); sxmli.bracedepth = 0; FlexLexer * fp = new simplexmlFlexLexer(); this->FlexTreeBuilder::lexer(fp); fp->set_debug( 1 ); sxmli.builder = this->builder(); } } // namespace s11n static s11n::KeyValueParser sxml_kvp; int sxml_close_node() { // closing something // where to store this? if( ! sxmli.cdata.empty() ) sxmli.builder->add_property( "CDATA", sxmli.cdata ); if( sxmli.bracedepth == sxmli.builder->node_depth() ) { std::string cd = sxmli.cdata.top(); s11n::trim_string( cd ); if( ! cd.empty() ) sxmli.builder->add_property( "CDATA", cd ); sxmli.builder->close_node(); } if( 0 == sxmli.builder->node_depth() ) { // return once we close the first top-level node. sxmli.cleanup(); return 0; } sxmli.cdata.pop(); --sxmli.bracedepth; return sxmli.builder->node_depth(); } %} @COMMON_DEFINITIONS@ // COMMON_DEFINITIONS: See common_flex_definitions.in %x IN_ELEM_DECL %x IN_COMMENT %x IN_CDATA %% // \<\!.+\n { // continue; // } "<?"[^>]+ {;} "<![CDATA[" { BEGIN IN_CDATA; } <IN_CDATA>"]]>" { BEGIN 0; } <INITIAL>^({SPACE}+) {continue;} <INITIAL>({SPACE}+)$ {continue;} <IN_CDATA>(.|\n) { if( ! sxmli.cdata.empty() ) sxmli.cdata.top() += YYText(); continue; } "<"{WORD} { sxmli.yystr = YYText(); //COUT << "opening element? "<<sxmli.yystr<<std::endl; sxmli.nodename = sxmli.yystr.substr( 1 ); sxmli.builder->open_node("NoClassYet", sxmli.nodename); sxmli.cdata.push(std::string()); ++sxmli.bracedepth; BEGIN IN_ELEM_DECL; } <IN_ELEM_DECL>{WORD}=({QUOTED_STRING}|{RGB_COLOR}|{WORD}|({DIGIT}+)|([a-zA-Z_0-9]+)) { // foo if( ! sxml_kvp.parse( YYText() ) ) { CERR << "syntax error: could not parse key=value from ["<<YYText()<<"]"<<std::endl; sxmli.cleanup(); return 0; } std::string attr = sxml_kvp.value(); s11n::normalize_string( attr ); std::string k = sxml_kvp.key(); if( k == "class" ) { sxmli.builder->current_node()->implClass( attr ); } else { sxmli.builder->add_property( k, attr ); } } <IN_ELEM_DECL>\/{SPACE}*> { if( 0 == sxml_close_node() ) return 0; BEGIN 0; } <IN_ELEM_DECL>">" { // closing element decl. //sxmli.yystr = YYText(); BEGIN 0; } <IN_ELEM_DECL>"<" { CERR << "syntax error: we found a '<' character inside an element declaration." << std::endl; sxmli.cleanup(); return 0; ; } <IN_ELEM_DECL>(.|\n) {;} \<\/{WORD}({SPACE}*)\> { if( 0 == sxml_close_node() ) return 0; } "<!--" { //COUT << "entering comment..." << std::endl; BEGIN IN_COMMENT; } <IN_COMMENT>"<!--" { CERR << "syntax error: you may not have comments within comments." << std::endl; sxmli.cleanup(); return 0; //BEGIN 0; } <IN_COMMENT>"-->" { //COUT << "... exiting comment" << std::endl; BEGIN 0; } <IN_COMMENT>[.\n] {;} .|\n { if( ! sxmli.cdata.empty() ) sxmli.cdata.top() += YYText(); } %% #if SIMPLEXML_DO_MAIN #include <s11n/FlexShell.h> #include <s11n/ELib.h> using namespace s11n; int main( int argc, char ** argv ) { // ELib::init( argc, argv ); // FlexShell sh; // sh.readline().loadHistory( ELib::s11nHome( "simplexml.history" ) ); SimpleXMLFlexer xmlflex; FlexLexer * lexer = xmlflex.lexer(); while( 0 != (lexer->yylex() ) ); // sh.lexer( lexer ); // while( ! sh.readline( ELib::args().getString("$0") + ":>", FlexShell::dummy_token_callback ).empty() ); std::cout << endl; if( S11nNode * node = xmlflex.root_node() ) { node->serializeProperties( *node ); COUT << "Re-serialized:\n"; S11nNodeSerializerSimpleXML::save( *node, std::cout ); S11nNodeSerializerFunTxt::save( *node, std::cout ); S11nNodeSerializerFunXML::save( *node, std::cout ); } return 0; } #endif |