[fU-cvs] libfunutil/lib/s11n/parsers common_flex_definitions.at,NONE,1.1 compact.flex.at,NONE,1.1 he

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/libfunutil/libfunutil/lib/s11n/parsers
In directory sc8-pr-cvs1:/tmp/cvs-serv24075/lib/s11n/parsers

Added Files:
	common_flex_definitions.at compact.flex.at hex.flex.at 
	paren.flex.at 
Log Message:
egg. apparently forgot them last night.

--- NEW FILE: common_flex_definitions.at ---
SPACE			([ \t])
NONSPACE		([^ \t])
WORD			([_[:alnum:]]+)
WORDS			(({WORD}{SPACE}){2,})
START_OF_LINE		^({SPACE}*)

DIGIT			([0-9])
INTEGER			({DIGIT}+)

DOUBLE_QUOTED_STRING	([\"]([^\"]|(\\\"))+[\"])
SINGLE_QUOTED_STRING	([\'][^\']*[\'])
QUOTED_STRING		({SINGLE_QUOTED_STRING}|{DOUBLE_QUOTED_STRING})

	// QUOTED_STRING: doesn't yet handle escaped quotes-in-quotes. Need to use
	//# separate states for that, i think. 

NUMBER_type1		([-+]?{DIGIT}+\.?([eE][-+]?{DIGIT}+)?)
NUMBER_type2		([-+]?{DIGIT}*\.{DIGIT}+([eE][-+]?{DIGIT}+)?)
NUMBER			({NUMBER_type1}|{NUMBER_type2})

CLASSNAME		(({WORD}\:\:)+)?{WORD}
ALMOST_A_WORD		[\._a-zA-Z0-9]+
VARNAME			([a-zA-Z_][_a-zA-Z0-9]*)
VARNAME_LENIENT		([a-zA-Z_][\.\-_a-zA-Z0-9]*|{CLASSNAME})

HEX_DIGIT		([a-fA-F0-9])
RGB_COLOR		(#{HEX_DIGIT}{6})
SEMICOLON		({SPACE}*;+{SPACE}*)

        // ESCAPED_MULTILINE	((.*\$)/[^(\\\n)]*)
        // ESCAPED_MULTILINE	([.]+([^\\]\n$))
        // {ESCAPED_MULTILINE}|
        // PROPERTY_TYPES		({ESCAPED_MULTILINE})
	//UNTIL_SEMICOLON		.+\;{SPACE}*$
	//PROPERTY_VALUE		({NUMBER}|{ALMOST_A_WORD}|{QUOTED_STRING}|{WORD_WITH_PUNCTUATION}|{RGB_COLOR})
        // WORD_WITH_PUNCTUATION	[#.\!\?\-_a-zA-Z0-9]+
        //UNTIL_EOL		([.\n]+[^\\]$)

UNTIL_SEMICOLON		(.+;)

	//ESCAPED_LINES		((.+([\\]\n))+[^\\]\n)
PROPERTY_DECL_RULES	({QUOTED_STRING}|{ALMOST_A_WORD}|{WORDS}|{RGB_COLOR})
PROP_DECL_EQUALS	(({WORD}|{NUMBER}){SPACE}*={SPACE}*)
PROP_DECL_SPACE		(({WORD}|{NUMBER}){SPACE}+)
PROPERTY_DECLS		({PROP_DECL_EQUALS}|{PROP_DECL_SPACE})
PROPERTY_DEFINITION	({PROPERTY_DECLS}{PROPERTY_DECL_RULES})

	// \<[^/][^\>]+\> { add_token( elib::efstring( "opening ",YYText() ) ); }
	// \<\/[^\>]+\> { add_token( elib::efstring( "closing ",YYText() ) ); }

--- NEW FILE: compact.flex.at ---
%option c++
%{
//
// LICENSE: Public Domain
// Author: stephan - sg...@us...
//

#define YY_SKIP_YYWRAP 1
int yywrap() { return 1; }

// #include <stdio.h>
#include <cassert>
#include <iostream>
#include <string>
#include <deque>

#include <s11n/s11n-macros.h> // COUT/CERR
#define PCERR CERR << "compact.flex error:"
// #include <toolbox/string_util.h> // trim_string()

// #include <toolbox/PropertyStore.h>
// #include <toolbox/ClassLoader.h>
// #include <toolbox/Instantiator.h>
// #include <toolbox/KeyValueParser.h>

#include <s11n/node_builder.h>
#include <s11n/flex_lexers.h>
#include <toolbox/string_util.h> // hex2int()

using std::cin;
using std::cerr;
using std::cout;
using std::endl;

/**
Basic grammar spec:

{NODE_OPEN}{NAME_SIZE}{NODE_NAME}<class_name_size>{CLASSNAME}
    ({PROP_OPEN}<key_size><key><value_size><value>)*
    (sub-nodes)*
{NODE_CLOSE}

See the lex source for the meanings of the {TOKENS} named above.
*/

namespace {
        unsigned long node_depth = 0;
        unsigned int loops = 0;
        std::string word;
        std::string propname;
        std::string propval;
        std::string nodename;
        std::string nodeclass;
        bool in_prop;
        unsigned int decval = 0;
        unsigned int lcv = 0;
        s11n::node_builder * serbuilder = 0;
}

namespace s11n {
        FlexLexer *
        CompactTreeBuilder::lexer()
        {
                FlexLexer * fp = 0;
                fp = this->FlexTreeBuilder::lexer();
                if( fp ) return fp;
                // else first-time setup:
                this->reset();
                return this->FlexTreeBuilder::lexer();
        }

        void
        CompactTreeBuilder::reset()
        {
                this->FlexTreeBuilder::reset();
                FlexLexer * foo = new compactFlexLexer();
                this->FlexTreeBuilder::lexer( foo );
                serbuilder = this->builder();
                node_depth = 0;
        }
} // namespace s11n

namespace {
        char inchar;
}
#define READWORD(SZ) word = ""; \
        for( int i = 0; i < SZ; i++ )\
        {\
                inchar = yyinput(); \
                if( 0 == inchar ) {word=""; PCERR << "Reached EOF during READWORD!" << endl; return 0;} \
                word += inchar; \
        };\
	decval = toolbox::hex2int(word)

// 	if( 0 == decval ) { PCERR << "Error reading word of size " << SZ<<". Maybe reached end of input?" << endl; return 0; }

%}

HEX_DIGIT		([a-fA-F0-9])
WORD4			({HEX_DIGIT}{4})
	// maintenance note: these hex codes must be kept in sync with those from HexSerializer's enum
NODE_OPEN		f1
NODE_CLOSE		f0
PROP_OPEN		e1
COOKIE			51191001
DATA_END		51191000

%%

{COOKIE} {;}
{DATA_END} { return 0; }
[ \t\n] {;}

{NODE_OPEN} {
        //COUT << "Opening node." << std::endl;
        READWORD(2); // read node name size
        nodename = "";
        loops = decval;
        for( lcv = 0; lcv < loops; lcv++ )
        {
                //READWORD(2);
                //cout << "["<<word<<"/"<<decval<<"]";
                nodename += yyinput(); // (unsigned char) decval;
        }
        //cout<< endl;

        READWORD(2); // get class name size
        nodeclass = "";
        loops = decval;
        for( lcv = 0; lcv < loops; lcv++ )
        { // read class name
                nodeclass += (unsigned char) yyinput(); // decval;
        }
        //COUT << "nodename=["<<nodename<<"]"<<"["<<nodeclass<<"]"<<endl;
        if( ! serbuilder->open_node( nodeclass, nodename ) )
        {
                PCERR<< "open_node("<<nodeclass<<","<<nodename<<") failed." << endl;
                return 0;
        }
        nodename = nodeclass = "";
	}

{NODE_CLOSE} {
        //COUT << "Closing node." << std::endl;
        serbuilder->close_node();
        if( 0 == serbuilder->node_depth() )
        {
                // stop once we close the first top-level node.
                return 0;
        }
        continue;
	}
{PROP_OPEN} {
        //COUTL( "Opening property" );
        propname = "";
        READWORD(2); // prop name size
        loops = decval;
        for( lcv = 0; lcv < loops; lcv++ )
        { // read property name
                propname += (unsigned char) yyinput(); // decval;
        }
        READWORD(8); // get value size
        propval = "";
        loops = decval;
        for( lcv = 0; lcv < loops; lcv++ )
        { // read property's value
                propval += (unsigned char) yyinput(); // decval;
        }
        serbuilder->add_property( propname, propval );
        propval = propname = "";
	}

[.] {
        PCERR << "unexpected token: " << YYText() <<std::endl;
        return 0;
	}

%%

#if COMPACT_DO_MAIN
#include <s11n/s11n_io.h> // HexSerializer
// #include <s11n/FlexShell.h>
// #include <s11n/ELib.h>
using namespace s11n;
int main( int argc, char ** argv )
{
        s11n::CompactTreeBuilder bob;
        FlexLexer * lexer = bob.lexer();
//         FlexLexer * lexer = new compactFlexLexer();
        while( 0 != (lexer->yylex() ) );
        if( bob.root_node() )
        {
                s11n::ParenSerializer ser;
                ser.serialize( *(bob.root_node()), std::cout );
//                 s11n::CompactSerializer compact;
//                 compact.serialize( *(bob.root_node()), std::cout );
        }
        return 0;
}

#endif

--- NEW FILE: hex.flex.at ---
%option c++
%{
//
// LICENSE: Public Domain
// Author: stephan - sg...@us...
//

#define YY_SKIP_YYWRAP 1
int yywrap() { return 1; }

// #include <stdio.h>
#include <cassert>
#include <iostream>
#include <string>
#include <deque>

#include <s11n/s11n-macros.h> // COUT/CERR
#define PCERR CERR << "hex.flex error:"
// #include <toolbox/string_util.h> // trim_string()

// #include <toolbox/PropertyStore.h>
// #include <toolbox/ClassLoader.h>
// #include <toolbox/Instantiator.h>
// #include <toolbox/KeyValueParser.h>

#include <s11n/node_builder.h>
#include <s11n/flex_lexers.h>
#include <toolbox/string_util.h> // hex2int()

using std::cin;
using std::cerr;
using std::cout;
using std::endl;

/**
Basic grammar spec for the "hexed" serialization format:

{NODE_OPEN}{NAME_SIZE}{NODE_NAME}<class_name_size>{CLASSNAME}
    ({PROP_OPEN}<key_size><key><value_size><value>)*
    (sub-nodes)*
{NODE_CLOSE}

See the lex source for the meanings of the {TOKENS} named above.
*/

namespace {
        unsigned long node_depth = 0;
        unsigned int loops = 0;
        std::string word;
        std::string propname;
        std::string propval;
        std::string nodename;
        std::string nodeclass;
        bool in_prop;
        unsigned int decval = 0;
        unsigned int lcv = 0;
        s11n::node_builder * serbuilder = 0;
}

namespace s11n {
        FlexLexer *
        HexTreeBuilder::lexer()
        {
                FlexLexer * fp = 0;
                fp = this->FlexTreeBuilder::lexer();
                if( fp ) return fp;
                // else first-time setup:
                this->reset();
                return this->FlexTreeBuilder::lexer();
        }

        void
        HexTreeBuilder::reset()
        {
                this->FlexTreeBuilder::reset();
                FlexLexer * foo = new hexFlexLexer();
                this->FlexTreeBuilder::lexer( foo );
                serbuilder = this->builder();
                node_depth = 0;
        }
} // namespace s11n

namespace {
        char inchar;
}
#define READWORD(SZ) word = ""; \
        for( int i = 0; i < SZ; i++ )\
        {\
                inchar = yyinput(); \
                if( 0 == inchar ) {word=""; return 0;} \
                word += inchar; \
        };\
	decval = toolbox::hex2int(word);\
	if( 0 == decval ) { PCERR << "Error reading word (size="<<SZ<<"). Maybe reached end of input?" << endl; return 0; }

%}

HEX_DIGIT		([a-fA-F0-9])
WORD2			{HEX_DIGIT}{2}
	// maintenance note: these hex codes must be kept in sync with those from HexSerializer's enum
NODE_OPEN		11
NODE_CLOSE		10
PROP_OPEN		21
COOKIE			51190001
DATA_END		51190000

%%

\n {;}

{COOKIE} {;}
{DATA_END} { return 0; }

{NODE_OPEN} {
        //COUT << "Opening node." << std::endl;
        READWORD(2); // read node name size
        nodename = "";
        loops = decval;
        for( lcv = 0; lcv < loops; lcv++ )
        { // read node name
                READWORD(2); // read next char of node name.
                nodename += (unsigned char) decval;
        }
        //cout<< endl;

        READWORD(2); // get class name size
        nodeclass = "";
        loops = decval;
        for( lcv = 0; lcv < loops; lcv++ )
        { // read class name
                READWORD(2); // get next char
                nodeclass += (unsigned char) decval;
        }
        if( ! serbuilder->open_node( nodeclass, nodename ) )
        {
                PCERR<< "open_node("<<nodeclass<<","<<nodename<<") failed." << endl;
                return 0;
        }

	}

{NODE_CLOSE} {
        //COUT << "Closing node." << std::endl;
        serbuilder->close_node();
        if( 0 == serbuilder->node_depth() )
        {
                // stop once we close the first top-level node.
                return 0;
        }

	}
{PROP_OPEN} {
        //COUTL( "Opening property" );
        READWORD(2); // prop name size
        //COUT << "name size=" <<word << " dec="<<decval<<std::endl;
        propname = "";
        loops = decval;
        for( lcv = 0; lcv < loops; lcv++ )
        { // read property naem
                READWORD(2); // next char
                propname += (unsigned char) decval;
        }
        READWORD(8); // get value size
        propval = "";
        loops = decval;
        for( lcv = 0; lcv < loops; lcv++ )
        { // read property's value
                READWORD(2); // next char
                propval += (unsigned char) decval;
        }
        serbuilder->add_property( propname, propval );
	}

{WORD2}|[.] {
        PCERR<< "unexpected token: " << YYText()<<std::endl;
        return 0;
	}

%%

#if HEX_DO_MAIN
#include <s11n/s11n_io.h> // HexSerializer
// #include <s11n/FlexShell.h>
// #include <s11n/ELib.h>
using namespace s11n;
int main( int argc, char ** argv )
{
        s11n::HexTreeBuilder bob;
        FlexLexer * lexer = bob.lexer();
//         FlexLexer * lexer = new hexFlexLexer();
        while( 0 != (lexer->yylex() ) );
        if( bob.root_node() )
        {
                s11n::ParenSerializer ser;
                ser.serialize( *(bob.root_node()), std::cout );
        }
        return 0;
}

#endif

--- NEW FILE: paren.flex.at ---
%option c++
%{
/**
LICENSE: Public Domain
Author: stephan - sg...@us...

This lexer reads in a lisp-like (but not lisp) grammar for the s11n
framework. It's output partner is s11n::ParenSerializer.

Sample:

nodename=(ImplClassName (propery_name property value) (prop2 value of \) prop2) 
	another_node=(ns::ClassName)
)

nodename represents an s11n_node::name() ImplClassName represents the
object's impl_class() value.

Note that closing parens in your data must be backslash-escaped. This
parser arguably strips all non-paired backslashes, so any actual
backslashes must also be escaped (C-style). The ParensSerializer takes
this into account and escapes it's serialized data.

*/

#define YY_SKIP_YYWRAP 1
int yywrap() { return 1; }

// #include <stdio.h>
#include <cassert>
#include <iostream>
#include <string>
#include <deque>

#include <s11n/s11n-macros.h> // COUT/CERR
#define PCERR CERR << "paren.flex error:"
#include <toolbox/string_util.h> // trim_string()

// #include <toolbox/PropertyStore.h>
// #include <toolbox/ClassLoader.h>
// #include <toolbox/Instantiator.h>
// #include <toolbox/KeyValueParser.h>

#include <s11n/node_builder.h>
#include <s11n/flex_lexers.h>

using std::cin;
using std::cerr;
using std::cout;
using std::endl;

namespace {
        unsigned long node_depth = 0;
        std::string tmpstr;
        std::string nodename;
        std::string nodeclass;
        bool in_prop;
        s11n::node_builder * serbuilder = 0;
}

namespace s11n {
        FlexLexer *
        ParenTreeBuilder::lexer()
        {
                FlexLexer * fp = 0;
                fp = this->FlexTreeBuilder::lexer();
                if( fp ) return fp;
                // else first-time setup:
                this->reset();
                return this->FlexTreeBuilder::lexer();
        }

        void
        ParenTreeBuilder::reset()
        {
                this->FlexTreeBuilder::reset();
                FlexLexer * foo = new parenFlexLexer();
                this->FlexTreeBuilder::lexer( foo );
                serbuilder = this->builder();
                node_depth = 0;
        }
} // namespace s11n

/*****
*****/

%}

@COMMON_DEFINITIONS@

%x OPEN_CLASS
%x IN_PROPERTY
OPENER		\(
CLOSER		\)
NODENAME 	{VARNAME}
PROPERTY	{VARNAME}

%%

{SPACE}*[#;].*$ {;} // comment lines

"(*" { // (* comment blocks *)
        // Code mostly taken from the flex info pages.
        int c;
        while((c = yyinput()) != 0)
        {
                if(c == '*')
                {
                        c = yyinput();
                        if( 0 == c )
                        {
                                PCERR << "hit EOF in a (*comment block*)." << std::endl;
                                return 0;
                        }
                        if( ')' == c ) break;
                        //??? else unput(c);
                }
        }
        return 1;
	}

{OPENER}{SPACE}* {
        BEGIN IN_PROPERTY;
	}

<IN_PROPERTY>({VARNAME}|{NUMBER})({SPACE})* { // key name of property
        std::string propname = YYText();

        // strip leading/trailing spaces from the property name:
        static const std::string avoid = " \t\n";
        std::string::size_type tail = propname.find_last_not_of( avoid );
        propname = propname.substr( propname.find_first_not_of( avoid ),
                                    (std::string::npos == tail) ? tail : (tail + 1) );
        //COUT << "property ["<<propname<<"] = ";

        // Now we consider all data until a non-escaped closing brace
        // to be the value of the property...
        std::string propval = "";
        unsigned char c = yyinput();
        bool escaped = false;
        while( 0 != c )
        {
                if( (!escaped) && '\\' == c )
                { // next char will be considered escaped, and this slash is stripped.
                        escaped = true;
                        c = yyinput();
                        if( ')' != c ) propval += '\\';
                        // ^^^^ put back the slash. We only want to strip escapes from closing parens.
                        continue;
                }

                if ( !escaped && ')' == c )
                {
                        // Look for a non-escaped paren to close
                        // us. Ideally we would count the
                        // opened/closed "data parens", but this would
                        // quickly get complicated and would breaking
                        // with certain data sets.
                        break;
                }
                escaped = false;
                propval += c;
                c = yyinput();
                //COUT << "["<<c<<"]"<<endl;
        }
        if( 0 == c )
        {
                PCERR << "Reached EOF while reading value for property '"<<propname<<"'. This shouldn't happen." << std::endl;
                return 0;
        }
        //std::cout << "["<<propval<<"]"<<std::endl;
        serbuilder->add_property( propname, propval );
        BEGIN 0;
	}

{NODENAME}={OPENER} {
        tmpstr = YYText();
        nodename = tmpstr.substr( 0, tmpstr.find( "=" ) );
        //COUT << "node name="<<nodename<<std::endl;
        BEGIN OPEN_CLASS;
	}

<OPEN_CLASS>{SPACE}+ {;}

<OPEN_CLASS>{CLOSER} { // special case: empty node: nodename=()
        --node_depth;
        //COUT << "ignoring empty node " << nodename << std::endl;
        BEGIN 0;
	}
<OPEN_CLASS>[{SPACE}\n]+ {;}

<OPEN_CLASS>{CLASSNAME} {
        nodeclass = std::string(YYText());
        if( nodeclass.empty() )
        {
                PCERR << "nodes may not have empty names!" <<std::endl;
                return 0;
        }
        //COUT << "opening '"<<nodename<<"', class=" << nodeclass << std::endl;
        ++node_depth;
        if( ! serbuilder->open_node( nodeclass, nodename ) )
        {
                PCERR<< "open_node("<<nodeclass<<","<<nodename<<") failed." << endl;
                return 0;
        }
        BEGIN 0;
	}

<OPEN_CLASS>[.\n] {
        PCERR << "did not find class name after '"<<nodename<<"=(" << std::endl;
        return 0;
	}

{CLOSER} {
        //COUT << node_depth<< " Closing node" << std::endl;
        serbuilder->close_node();
        --node_depth;
        if( 0 == serbuilder->node_depth() )
        {
                // stop once we close the first top-level node.
                return 0;
        }
	}

^"(s11n::parens)" {;} // magic cookie

({SPACE})|\n {;}

. {;}   // [.]+ { PCERR << "Unexpected token: [" << YYText()<<"]"<<std::endl;}

%%
// ^{SPACE}*[#;].*$ {;} // comment lines
// [#;][^{OPENER}]* {;} // comments trailing after node closures

#if PAREN_DO_MAIN
#include <s11n/s11n_io.h> // ParenSerializer
int main( int argc, char ** argv )
{
        s11n::ParenTreeBuilder bob;
        FlexLexer * lexer = bob.lexer();
        while( 0 != (lexer->yylex() ) );
        if( bob.root_node() )
        {
                s11n::ParenSerializer ser;
                ser.serialize( *(bob.root_node()), std::cout );
        }
        return 0;
}

#endif

[fU-cvs] libfunutil/lib/s11n/parsers common_flex_definitions.at,NONE,1.1 compact.flex.at,NONE,1.1 he

[fU-cvs] libfunutil/lib/s11n/parsers common_flex_definitions.at,NONE,1.1 compact.flex.at,NONE,1.1 hex.flex.at,NONE,1.1 paren.flex.at,NONE,1.1