From: <sg...@us...> - 2003-10-14 20:37:55
|
Update of /cvsroot/libfunutil/libfunutil/lib/s11n/parsers In directory sc8-pr-cvs1:/tmp/cvs-serv24075/lib/s11n/parsers Added Files: common_flex_definitions.at compact.flex.at hex.flex.at paren.flex.at Log Message: egg. apparently forgot them last night. --- NEW FILE: common_flex_definitions.at --- SPACE ([ \t]) NONSPACE ([^ \t]) WORD ([_[:alnum:]]+) WORDS (({WORD}{SPACE}){2,}) START_OF_LINE ^({SPACE}*) DIGIT ([0-9]) INTEGER ({DIGIT}+) DOUBLE_QUOTED_STRING ([\"]([^\"]|(\\\"))+[\"]) SINGLE_QUOTED_STRING ([\'][^\']*[\']) QUOTED_STRING ({SINGLE_QUOTED_STRING}|{DOUBLE_QUOTED_STRING}) // QUOTED_STRING: doesn't yet handle escaped quotes-in-quotes. Need to use //# separate states for that, i think. NUMBER_type1 ([-+]?{DIGIT}+\.?([eE][-+]?{DIGIT}+)?) NUMBER_type2 ([-+]?{DIGIT}*\.{DIGIT}+([eE][-+]?{DIGIT}+)?) NUMBER ({NUMBER_type1}|{NUMBER_type2}) CLASSNAME (({WORD}\:\:)+)?{WORD} ALMOST_A_WORD [\._a-zA-Z0-9]+ VARNAME ([a-zA-Z_][_a-zA-Z0-9]*) VARNAME_LENIENT ([a-zA-Z_][\.\-_a-zA-Z0-9]*|{CLASSNAME}) HEX_DIGIT ([a-fA-F0-9]) RGB_COLOR (#{HEX_DIGIT}{6}) SEMICOLON ({SPACE}*;+{SPACE}*) // ESCAPED_MULTILINE ((.*\$)/[^(\\\n)]*) // ESCAPED_MULTILINE ([.]+([^\\]\n$)) // {ESCAPED_MULTILINE}| // PROPERTY_TYPES ({ESCAPED_MULTILINE}) //UNTIL_SEMICOLON .+\;{SPACE}*$ //PROPERTY_VALUE ({NUMBER}|{ALMOST_A_WORD}|{QUOTED_STRING}|{WORD_WITH_PUNCTUATION}|{RGB_COLOR}) // WORD_WITH_PUNCTUATION [#.\!\?\-_a-zA-Z0-9]+ //UNTIL_EOL ([.\n]+[^\\]$) UNTIL_SEMICOLON (.+;) //ESCAPED_LINES ((.+([\\]\n))+[^\\]\n) PROPERTY_DECL_RULES ({QUOTED_STRING}|{ALMOST_A_WORD}|{WORDS}|{RGB_COLOR}) PROP_DECL_EQUALS (({WORD}|{NUMBER}){SPACE}*={SPACE}*) PROP_DECL_SPACE (({WORD}|{NUMBER}){SPACE}+) PROPERTY_DECLS ({PROP_DECL_EQUALS}|{PROP_DECL_SPACE}) PROPERTY_DEFINITION ({PROPERTY_DECLS}{PROPERTY_DECL_RULES}) // \<[^/][^\>]+\> { add_token( elib::efstring( "opening ",YYText() ) ); } // \<\/[^\>]+\> { add_token( elib::efstring( "closing ",YYText() ) ); } --- NEW FILE: compact.flex.at --- %option c++ %{ // // LICENSE: Public Domain // Author: stephan - sg...@us... // #define YY_SKIP_YYWRAP 1 int yywrap() { return 1; } // #include <stdio.h> #include <cassert> #include <iostream> #include <string> #include <deque> #include <s11n/s11n-macros.h> // COUT/CERR #define PCERR CERR << "compact.flex error:" // #include <toolbox/string_util.h> // trim_string() // #include <toolbox/PropertyStore.h> // #include <toolbox/ClassLoader.h> // #include <toolbox/Instantiator.h> // #include <toolbox/KeyValueParser.h> #include <s11n/node_builder.h> #include <s11n/flex_lexers.h> #include <toolbox/string_util.h> // hex2int() using std::cin; using std::cerr; using std::cout; using std::endl; /** Basic grammar spec: {NODE_OPEN}{NAME_SIZE}{NODE_NAME}<class_name_size>{CLASSNAME} ({PROP_OPEN}<key_size><key><value_size><value>)* (sub-nodes)* {NODE_CLOSE} See the lex source for the meanings of the {TOKENS} named above. */ namespace { unsigned long node_depth = 0; unsigned int loops = 0; std::string word; std::string propname; std::string propval; std::string nodename; std::string nodeclass; bool in_prop; unsigned int decval = 0; unsigned int lcv = 0; s11n::node_builder * serbuilder = 0; } namespace s11n { FlexLexer * CompactTreeBuilder::lexer() { FlexLexer * fp = 0; fp = this->FlexTreeBuilder::lexer(); if( fp ) return fp; // else first-time setup: this->reset(); return this->FlexTreeBuilder::lexer(); } void CompactTreeBuilder::reset() { this->FlexTreeBuilder::reset(); FlexLexer * foo = new compactFlexLexer(); this->FlexTreeBuilder::lexer( foo ); serbuilder = this->builder(); node_depth = 0; } } // namespace s11n namespace { char inchar; } #define READWORD(SZ) word = ""; \ for( int i = 0; i < SZ; i++ )\ {\ inchar = yyinput(); \ if( 0 == inchar ) {word=""; PCERR << "Reached EOF during READWORD!" << endl; return 0;} \ word += inchar; \ };\ decval = toolbox::hex2int(word) // if( 0 == decval ) { PCERR << "Error reading word of size " << SZ<<". Maybe reached end of input?" << endl; return 0; } %} HEX_DIGIT ([a-fA-F0-9]) WORD4 ({HEX_DIGIT}{4}) // maintenance note: these hex codes must be kept in sync with those from HexSerializer's enum NODE_OPEN f1 NODE_CLOSE f0 PROP_OPEN e1 COOKIE 51191001 DATA_END 51191000 %% {COOKIE} {;} {DATA_END} { return 0; } [ \t\n] {;} {NODE_OPEN} { //COUT << "Opening node." << std::endl; READWORD(2); // read node name size nodename = ""; loops = decval; for( lcv = 0; lcv < loops; lcv++ ) { //READWORD(2); //cout << "["<<word<<"/"<<decval<<"]"; nodename += yyinput(); // (unsigned char) decval; } //cout<< endl; READWORD(2); // get class name size nodeclass = ""; loops = decval; for( lcv = 0; lcv < loops; lcv++ ) { // read class name nodeclass += (unsigned char) yyinput(); // decval; } //COUT << "nodename=["<<nodename<<"]"<<"["<<nodeclass<<"]"<<endl; if( ! serbuilder->open_node( nodeclass, nodename ) ) { PCERR<< "open_node("<<nodeclass<<","<<nodename<<") failed." << endl; return 0; } nodename = nodeclass = ""; } {NODE_CLOSE} { //COUT << "Closing node." << std::endl; serbuilder->close_node(); if( 0 == serbuilder->node_depth() ) { // stop once we close the first top-level node. return 0; } continue; } {PROP_OPEN} { //COUTL( "Opening property" ); propname = ""; READWORD(2); // prop name size loops = decval; for( lcv = 0; lcv < loops; lcv++ ) { // read property name propname += (unsigned char) yyinput(); // decval; } READWORD(8); // get value size propval = ""; loops = decval; for( lcv = 0; lcv < loops; lcv++ ) { // read property's value propval += (unsigned char) yyinput(); // decval; } serbuilder->add_property( propname, propval ); propval = propname = ""; } [.] { PCERR << "unexpected token: " << YYText() <<std::endl; return 0; } %% #if COMPACT_DO_MAIN #include <s11n/s11n_io.h> // HexSerializer // #include <s11n/FlexShell.h> // #include <s11n/ELib.h> using namespace s11n; int main( int argc, char ** argv ) { s11n::CompactTreeBuilder bob; FlexLexer * lexer = bob.lexer(); // FlexLexer * lexer = new compactFlexLexer(); while( 0 != (lexer->yylex() ) ); if( bob.root_node() ) { s11n::ParenSerializer ser; ser.serialize( *(bob.root_node()), std::cout ); // s11n::CompactSerializer compact; // compact.serialize( *(bob.root_node()), std::cout ); } return 0; } #endif --- NEW FILE: hex.flex.at --- %option c++ %{ // // LICENSE: Public Domain // Author: stephan - sg...@us... // #define YY_SKIP_YYWRAP 1 int yywrap() { return 1; } // #include <stdio.h> #include <cassert> #include <iostream> #include <string> #include <deque> #include <s11n/s11n-macros.h> // COUT/CERR #define PCERR CERR << "hex.flex error:" // #include <toolbox/string_util.h> // trim_string() // #include <toolbox/PropertyStore.h> // #include <toolbox/ClassLoader.h> // #include <toolbox/Instantiator.h> // #include <toolbox/KeyValueParser.h> #include <s11n/node_builder.h> #include <s11n/flex_lexers.h> #include <toolbox/string_util.h> // hex2int() using std::cin; using std::cerr; using std::cout; using std::endl; /** Basic grammar spec for the "hexed" serialization format: {NODE_OPEN}{NAME_SIZE}{NODE_NAME}<class_name_size>{CLASSNAME} ({PROP_OPEN}<key_size><key><value_size><value>)* (sub-nodes)* {NODE_CLOSE} See the lex source for the meanings of the {TOKENS} named above. */ namespace { unsigned long node_depth = 0; unsigned int loops = 0; std::string word; std::string propname; std::string propval; std::string nodename; std::string nodeclass; bool in_prop; unsigned int decval = 0; unsigned int lcv = 0; s11n::node_builder * serbuilder = 0; } namespace s11n { FlexLexer * HexTreeBuilder::lexer() { FlexLexer * fp = 0; fp = this->FlexTreeBuilder::lexer(); if( fp ) return fp; // else first-time setup: this->reset(); return this->FlexTreeBuilder::lexer(); } void HexTreeBuilder::reset() { this->FlexTreeBuilder::reset(); FlexLexer * foo = new hexFlexLexer(); this->FlexTreeBuilder::lexer( foo ); serbuilder = this->builder(); node_depth = 0; } } // namespace s11n namespace { char inchar; } #define READWORD(SZ) word = ""; \ for( int i = 0; i < SZ; i++ )\ {\ inchar = yyinput(); \ if( 0 == inchar ) {word=""; return 0;} \ word += inchar; \ };\ decval = toolbox::hex2int(word);\ if( 0 == decval ) { PCERR << "Error reading word (size="<<SZ<<"). Maybe reached end of input?" << endl; return 0; } %} HEX_DIGIT ([a-fA-F0-9]) WORD2 {HEX_DIGIT}{2} // maintenance note: these hex codes must be kept in sync with those from HexSerializer's enum NODE_OPEN 11 NODE_CLOSE 10 PROP_OPEN 21 COOKIE 51190001 DATA_END 51190000 %% \n {;} {COOKIE} {;} {DATA_END} { return 0; } {NODE_OPEN} { //COUT << "Opening node." << std::endl; READWORD(2); // read node name size nodename = ""; loops = decval; for( lcv = 0; lcv < loops; lcv++ ) { // read node name READWORD(2); // read next char of node name. nodename += (unsigned char) decval; } //cout<< endl; READWORD(2); // get class name size nodeclass = ""; loops = decval; for( lcv = 0; lcv < loops; lcv++ ) { // read class name READWORD(2); // get next char nodeclass += (unsigned char) decval; } if( ! serbuilder->open_node( nodeclass, nodename ) ) { PCERR<< "open_node("<<nodeclass<<","<<nodename<<") failed." << endl; return 0; } } {NODE_CLOSE} { //COUT << "Closing node." << std::endl; serbuilder->close_node(); if( 0 == serbuilder->node_depth() ) { // stop once we close the first top-level node. return 0; } } {PROP_OPEN} { //COUTL( "Opening property" ); READWORD(2); // prop name size //COUT << "name size=" <<word << " dec="<<decval<<std::endl; propname = ""; loops = decval; for( lcv = 0; lcv < loops; lcv++ ) { // read property naem READWORD(2); // next char propname += (unsigned char) decval; } READWORD(8); // get value size propval = ""; loops = decval; for( lcv = 0; lcv < loops; lcv++ ) { // read property's value READWORD(2); // next char propval += (unsigned char) decval; } serbuilder->add_property( propname, propval ); } {WORD2}|[.] { PCERR<< "unexpected token: " << YYText()<<std::endl; return 0; } %% #if HEX_DO_MAIN #include <s11n/s11n_io.h> // HexSerializer // #include <s11n/FlexShell.h> // #include <s11n/ELib.h> using namespace s11n; int main( int argc, char ** argv ) { s11n::HexTreeBuilder bob; FlexLexer * lexer = bob.lexer(); // FlexLexer * lexer = new hexFlexLexer(); while( 0 != (lexer->yylex() ) ); if( bob.root_node() ) { s11n::ParenSerializer ser; ser.serialize( *(bob.root_node()), std::cout ); } return 0; } #endif --- NEW FILE: paren.flex.at --- %option c++ %{ /** LICENSE: Public Domain Author: stephan - sg...@us... This lexer reads in a lisp-like (but not lisp) grammar for the s11n framework. It's output partner is s11n::ParenSerializer. Sample: nodename=(ImplClassName (propery_name property value) (prop2 value of \) prop2) another_node=(ns::ClassName) ) nodename represents an s11n_node::name() ImplClassName represents the object's impl_class() value. Note that closing parens in your data must be backslash-escaped. This parser arguably strips all non-paired backslashes, so any actual backslashes must also be escaped (C-style). The ParensSerializer takes this into account and escapes it's serialized data. */ #define YY_SKIP_YYWRAP 1 int yywrap() { return 1; } // #include <stdio.h> #include <cassert> #include <iostream> #include <string> #include <deque> #include <s11n/s11n-macros.h> // COUT/CERR #define PCERR CERR << "paren.flex error:" #include <toolbox/string_util.h> // trim_string() // #include <toolbox/PropertyStore.h> // #include <toolbox/ClassLoader.h> // #include <toolbox/Instantiator.h> // #include <toolbox/KeyValueParser.h> #include <s11n/node_builder.h> #include <s11n/flex_lexers.h> using std::cin; using std::cerr; using std::cout; using std::endl; namespace { unsigned long node_depth = 0; std::string tmpstr; std::string nodename; std::string nodeclass; bool in_prop; s11n::node_builder * serbuilder = 0; } namespace s11n { FlexLexer * ParenTreeBuilder::lexer() { FlexLexer * fp = 0; fp = this->FlexTreeBuilder::lexer(); if( fp ) return fp; // else first-time setup: this->reset(); return this->FlexTreeBuilder::lexer(); } void ParenTreeBuilder::reset() { this->FlexTreeBuilder::reset(); FlexLexer * foo = new parenFlexLexer(); this->FlexTreeBuilder::lexer( foo ); serbuilder = this->builder(); node_depth = 0; } } // namespace s11n /***** *****/ %} @COMMON_DEFINITIONS@ %x OPEN_CLASS %x IN_PROPERTY OPENER \( CLOSER \) NODENAME {VARNAME} PROPERTY {VARNAME} %% {SPACE}*[#;].*$ {;} // comment lines "(*" { // (* comment blocks *) // Code mostly taken from the flex info pages. int c; while((c = yyinput()) != 0) { if(c == '*') { c = yyinput(); if( 0 == c ) { PCERR << "hit EOF in a (*comment block*)." << std::endl; return 0; } if( ')' == c ) break; //??? else unput(c); } } return 1; } {OPENER}{SPACE}* { BEGIN IN_PROPERTY; } <IN_PROPERTY>({VARNAME}|{NUMBER})({SPACE})* { // key name of property std::string propname = YYText(); // strip leading/trailing spaces from the property name: static const std::string avoid = " \t\n"; std::string::size_type tail = propname.find_last_not_of( avoid ); propname = propname.substr( propname.find_first_not_of( avoid ), (std::string::npos == tail) ? tail : (tail + 1) ); //COUT << "property ["<<propname<<"] = "; // Now we consider all data until a non-escaped closing brace // to be the value of the property... std::string propval = ""; unsigned char c = yyinput(); bool escaped = false; while( 0 != c ) { if( (!escaped) && '\\' == c ) { // next char will be considered escaped, and this slash is stripped. escaped = true; c = yyinput(); if( ')' != c ) propval += '\\'; // ^^^^ put back the slash. We only want to strip escapes from closing parens. continue; } if ( !escaped && ')' == c ) { // Look for a non-escaped paren to close // us. Ideally we would count the // opened/closed "data parens", but this would // quickly get complicated and would breaking // with certain data sets. break; } escaped = false; propval += c; c = yyinput(); //COUT << "["<<c<<"]"<<endl; } if( 0 == c ) { PCERR << "Reached EOF while reading value for property '"<<propname<<"'. This shouldn't happen." << std::endl; return 0; } //std::cout << "["<<propval<<"]"<<std::endl; serbuilder->add_property( propname, propval ); BEGIN 0; } {NODENAME}={OPENER} { tmpstr = YYText(); nodename = tmpstr.substr( 0, tmpstr.find( "=" ) ); //COUT << "node name="<<nodename<<std::endl; BEGIN OPEN_CLASS; } <OPEN_CLASS>{SPACE}+ {;} <OPEN_CLASS>{CLOSER} { // special case: empty node: nodename=() --node_depth; //COUT << "ignoring empty node " << nodename << std::endl; BEGIN 0; } <OPEN_CLASS>[{SPACE}\n]+ {;} <OPEN_CLASS>{CLASSNAME} { nodeclass = std::string(YYText()); if( nodeclass.empty() ) { PCERR << "nodes may not have empty names!" <<std::endl; return 0; } //COUT << "opening '"<<nodename<<"', class=" << nodeclass << std::endl; ++node_depth; if( ! serbuilder->open_node( nodeclass, nodename ) ) { PCERR<< "open_node("<<nodeclass<<","<<nodename<<") failed." << endl; return 0; } BEGIN 0; } <OPEN_CLASS>[.\n] { PCERR << "did not find class name after '"<<nodename<<"=(" << std::endl; return 0; } {CLOSER} { //COUT << node_depth<< " Closing node" << std::endl; serbuilder->close_node(); --node_depth; if( 0 == serbuilder->node_depth() ) { // stop once we close the first top-level node. return 0; } } ^"(s11n::parens)" {;} // magic cookie ({SPACE})|\n {;} . {;} // [.]+ { PCERR << "Unexpected token: [" << YYText()<<"]"<<std::endl;} %% // ^{SPACE}*[#;].*$ {;} // comment lines // [#;][^{OPENER}]* {;} // comments trailing after node closures #if PAREN_DO_MAIN #include <s11n/s11n_io.h> // ParenSerializer int main( int argc, char ** argv ) { s11n::ParenTreeBuilder bob; FlexLexer * lexer = bob.lexer(); while( 0 != (lexer->yylex() ) ); if( bob.root_node() ) { s11n::ParenSerializer ser; ser.serialize( *(bob.root_node()), std::cout ); } return 0; } #endif |