Re: [Flex-devel] v2.5.35 unicode diff
flex is a tool for generating scanners
Brought to you by:
wlestes
From: Will E. <wes...@gm...> - 2012-03-11 14:39:58
|
Paul, Thanks for your posting of this patch. As you know, unicode support is not a trivial change, so we'll be evaluating this to make sure it's what we want for flex. Any and all, your ideas, suggestions and comments on this patch. --Will On Sunday, 11 March 2012, 10:24 am -0400, Paul <pa...@pr...> wrote: > Attached is the diff of flex-2.5.35 to flex-2.5.35.U > The flag -U has been added to enable Unicode 16, otherwise it > behaves as flex 2.5.35 > To enter a Unicode character in a rule use \x0000. i.e. \x and > exactly 4 hex digits. > An example of a rule is: > ID ([a-zA-Z\x391-\x3a9\x3b1-\x3c9][a-zA-Z0-9\x391-\x3a9\x3b1-\x3c9]*) > Which is a-Z, 0-9, and the Greek upper & lower case Unicode letters. > For licenses, whatever covers flex2.5.35, covers this as well. > I have only tested this with Kubuntu 11.10. > Much thanks to the Unicode patch for flex-2.5.4a which was the basis > for this work. > > Cheers, > > Paul Neelands > diff flex-2.5.35/ccl.c flex-2.5.35.U/ccl.c > 83c83 > < ccltbl = reallocate_Character_array (ccltbl, > --- > > ccltbl = reallocate_wchar_array( ccltbl, > Only in flex-2.5.35.U: ccl.c~ > Only in flex-2.5.35: config.h > Only in flex-2.5.35: config.log > Only in flex-2.5.35: config.status > Only in flex-2.5.35: .deps > Common subdirectories: flex-2.5.35/doc and flex-2.5.35.U/doc > diff flex-2.5.35/ecs.c flex-2.5.35.U/ecs.c > 116c116 > < Char ccls[]; > --- > > wchar_t ccls[]; > Only in flex-2.5.35.U: ecs.c~ > Common subdirectories: flex-2.5.35/examples and flex-2.5.35.U/examples > diff flex-2.5.35/flexdef.h flex-2.5.35.U/flexdef.h > 108,109c108,109 > < /* Always be prepared to generate an 8-bit scanner. */ > < #define CSIZE 256 > --- > > /* Always be prepared to generate a 16-bit scanner. */ > > #define CSIZE 65536 > 648c648 > < extern Char *ccltbl; > --- > > extern wchar_t *ccltbl; > 678a679,684 > > #define allocate_wchar_array(size) \ > > (wchar_t *) allocate_array( size, sizeof( wchar_t ) ) > > > > #define reallocate_wchar_array(array,size) \ > > (wchar_t *) reallocate_array( (void *) array, size, sizeof( wchar_t ) ) > > > 778c784 > < extern void mkeccl PROTO ((Char[], int, int[], int[], int, int)); > --- > > extern void mkeccl PROTO ((wchar_t[], int, int[], int[], int, int)); > 866c872 > < extern void cshell PROTO ((Char[], int, int)); > --- > > extern void cshell PROTO ((wchar_t[], int, int)); > 930c936 > < extern Char myesc PROTO ((Char[])); > --- > > extern int myesc PROTO ((Char[])); > Only in flex-2.5.35.U: flexdef.h~ > diff flex-2.5.35/FlexLexer.h flex-2.5.35.U/FlexLexer.h > 36a37,38 > > // Since this header is generic for all sizes of flex scanners, you must > > // define the type YY_CHAR before including it: > 39a42 > > // typedef xxx YY_CHAR; > 43a47 > > // typedef xxx YY_CHAR; > 65c69 > < const char* YYText() const { return yytext; } > --- > > const YY_CHAR* YYText() const { return yytext; } > 95c99 > < char* yytext; > --- > > YY_CHAR* yytext; > 133,134c137,138 > < virtual int LexerInput( char* buf, int max_size ); > < virtual void LexerOutput( const char* buf, int size ); > --- > > virtual int LexerInput( YY_CHAR* buf, int max_size ); > > virtual void LexerOutput( const YY_CHAR* buf, int size ); > 137c141 > < void yyunput( int c, char* buf_ptr ); > --- > > void yyunput( int c, YY_CHAR* buf_ptr ); > 160c164 > < char yy_hold_char; > --- > > YY_CHAR yy_hold_char; > 166c170 > < char* yy_c_buf_p; > --- > > YY_CHAR* yy_c_buf_p; > 185c189 > < char* yy_last_accepting_cpos; > --- > > YY_CHAR* yy_last_accepting_cpos; > 190c194 > < char* yy_full_match; > --- > > YY_CHAR* yy_full_match; > Only in flex-2.5.35.U: FlexLexer.h~ > diff flex-2.5.35/flex.skl flex-2.5.35.U/flex.skl > 126c126 > < M4_GEN_PREFIX(`_scan_bytes') > --- > > M4_GEN_PREFIX(`_scan_chars') > 274a275 > > *out for U pn > 276c277 > < #define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) > --- > > /* #define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) pn*/ > 543,544c544,545 > < char *yy_ch_buf; /* input buffer */ > < char *yy_buf_pos; /* current position in input buffer */ > --- > > YY_CHAR *yy_ch_buf; /* input buffer */ > > YY_CHAR *yy_buf_pos; /* current position in input buffer */ > 546c547 > < /* Size of input buffer in bytes, not including room for EOB > --- > > /* Size of input buffer in chars, not including room for EOB > 642c643 > < static char yy_hold_char; > --- > > static YY_CHAR yy_hold_char; > 647c648 > < static char *yy_c_buf_p = (char *) 0; > --- > > static YY_CHAR *yy_c_buf_p = (char *) 0; > 678,680c679,684 > < YY_BUFFER_STATE yy_scan_buffer M4_YY_PARAMS( char *base, yy_size_t size M4_YY_PROTO_LAST_ARG ); > < YY_BUFFER_STATE yy_scan_string M4_YY_PARAMS( yyconst char *yy_str M4_YY_PROTO_LAST_ARG ); > < YY_BUFFER_STATE yy_scan_bytes M4_YY_PARAMS( yyconst char *bytes, int len M4_YY_PROTO_LAST_ARG ); > --- > > YY_BUFFER_STATE yy_scan_buffer M4_YY_PARAMS( YY_CHAR *base, yy_size_t size M4_YY_PROTO_LAST_ARG ); > > YY_BUFFER_STATE yy_scan_string M4_YY_PARAMS( yyconst YY_CHAR *yy_str M4_YY_PROTO_LAST_ARG ); > > /* This is the old yy_scan_bytes function - renamed to avoid > > * confusion since a character may now be 1 or 2 bytes. > > */ > > YY_BUFFER_STATE yy_scan_chars M4_YY_PARAMS( yyconst YY_CHAR *chars, int len M4_YY_PROTO_LAST_ARG ); > 747c751 > < *yy_cp = '\0'; \ > --- > > *yy_cp = (YY_CHAR) '\0'; \ > 805c809 > < char yy_hold_char; > --- > > YY_CHAR yy_hold_char; > 808c812 > < char *yy_c_buf_p; > --- > > YY_CHAR *yy_c_buf_p; > 816c820 > < char* yy_last_accepting_cpos; > --- > > YY_CHAR* yy_last_accepting_cpos; > 825c829 > < char *yy_full_match; > --- > > YY_CHAR *yy_full_match; > 837,838c841,842 > < char yytext_r[YYLMAX]; > < char *yytext_ptr; > --- > > YY_CHAR yytext_r[YYLMAX]; > > YY_CHAR *yytext_ptr; > 843c847 > < char *yytext_r; > --- > > YY_CHAR *yytext_r; > 999c1003 > < static void yyunput M4_YY_PARAMS( int c, char *buf_ptr M4_YY_PROTO_LAST_ARG); > --- > > static void yyunput M4_YY_PARAMS( int c, (YY_CHAR) *buf_ptr M4_YY_PROTO_LAST_ARG); > 1005c1009 > < static void yy_flex_strncpy M4_YY_PARAMS( char *, yyconst char *, int M4_YY_PROTO_LAST_ARG); > --- > > static void yy_flex_strncpy M4_YY_PARAMS( (YY_CHAR) *, yyconst char *, int M4_YY_PROTO_LAST_ARG); > 1009c1013 > < static int yy_flex_strlen M4_YY_PARAMS( yyconst char * M4_YY_PROTO_LAST_ARG); > --- > > static int yy_flex_strlen M4_YY_PARAMS( yyconst (YY_CHAR) * M4_YY_PROTO_LAST_ARG); > 1077c1081 > < #define ECHO fwrite( yytext, yyleng, 1, yyout ) > --- > > #define ECHO (void) fwrite( yytext, sizeof( YY_CHAR ), yyleng, yyout ) > 1095c1099 > < if ( (result = LexerInput( (char *) buf, max_size )) < 0 ) \ > --- > > if ( (result = LexerInput( buf, max_size )) < 0 ) \ > 1239c1243 > < register char *yy_cp, *yy_bp; > --- > > register YY_CHAR *yy_cp, *yy_bp; > 1535c1539 > < int yyFlexLexer::LexerInput( char* buf, int /* max_size */ ) > --- > > int yyFlexLexer::LexerInput( YY_CHAR* buf, int /* max_size */ ) > 1537c1541 > < int yyFlexLexer::LexerInput( char* buf, int max_size ) > --- > > int yyFlexLexer::LexerInput( YY_CHAR* buf, int max_size ) > 1544c1548 > < yyin->get( buf[0] ); > --- > > (void) yyin->read((unsigned char *) buf, sizeof( YY_CHAR ) ); > 1555c1559 > < (void) yyin->read( buf, max_size ); > --- > > (void) yyin->read((unsigned char *) buf, max_size * sizeof( YY_CHAR ) ); > 1560c1564 > < return yyin->gcount(); > --- > > return ( yyin->gcount() / sizeof( YY_CHAR ) ); > 1564c1568 > < void yyFlexLexer::LexerOutput( const char* buf, int size ) > --- > > void yyFlexLexer::LexerOutput( const YY_CHAR* buf, int size ) > 1566c1570 > < (void) yyout->write( buf, size ); > --- > > (void) yyout->write((unsigned char *) buf, size * sizeof( YY_CHAR ) ); > 1588,1589c1592,1593 > < register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; > < register char *source = YY_G(yytext_ptr); > --- > > register YY_CHAR *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; > > register YY_CHAR *source = YY_G(yytext_ptr); > 1658c1662 > < b->yy_ch_buf = (char *) > --- > > b->yy_ch_buf = (YY_CHAR *) > 1661c1665 > < b->yy_buf_size + 2 M4_YY_CALL_LAST_ARG ); > --- > > (b->yy_buf_size + 2)*sizeof( YY_CHAR ) M4_YY_CALL_LAST_ARG ); > 1737c1741 > < register char *yy_cp; > --- > > register YY_CHAR *yy_cp; > 1774c1778 > < static void yyunput YYFARGS2( int,c, register char *,yy_bp) > --- > > static void yyunput YYFARGS2( int,c, register YY_CHAR *,yy_bp) > 1777c1781 > < void yyFlexLexer::yyunput( int c, register char* yy_bp) > --- > > void yyFlexLexer::yyunput( int c, register YY_CHAR* yy_bp) > 1780c1784 > < register char *yy_cp; > --- > > register YY_CHAR *yy_cp; > 1792c1796 > < register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[ > --- > > register YY_CHAR *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[ > 1794c1798 > < register char *source = > --- > > register YY_CHAR *source = > 1809c1813 > < *--yy_cp = (char) c; > --- > > *--yy_cp = (YY_CHAR) c; > 1853c1857 > < *YY_G(yy_c_buf_p) = '\0'; > --- > > *YY_G(yy_c_buf_p) = (YY_CHAR)'\0'; > 1900c1904 > < *YY_G(yy_c_buf_p) = '\0'; /* preserve yytext */ > --- > > *YY_G(yy_c_buf_p) = (YY_CHAR)'\0'; /* preserve yytext */ > 2016c2020 > < b->yy_ch_buf = (char *) yyalloc( b->yy_buf_size + 2 M4_YY_CALL_LAST_ARG ); > --- > > b->yy_ch_buf = (YY_CHAR *) yyalloc( (b->yy_buf_size + 2)* sizeof( YY_CHAR ) M4_YY_CALL_LAST_ARG ); > 2292c2296 > < YY_BUFFER_STATE yy_scan_buffer YYFARGS2( char *,base, yy_size_t ,size) > --- > > YY_BUFFER_STATE yy_scan_buffer YYFARGS2( YY_CHAR *,base, yy_size_t ,size) > 2336c2340 > < YY_BUFFER_STATE yy_scan_string YYFARGS1( yyconst char *, yystr) > --- > > YY_BUFFER_STATE yy_scan_string YYFARGS1( yyconst YY_CHAR *, yystr) > 2338a2343,2345 > > int len; > > for ( len = 0; yy_str[len]; ++len ) > > ; > 2340c2347 > < return yy_scan_bytes( yystr, strlen(yystr) M4_YY_CALL_LAST_ARG); > --- > > return yy_scan_chars( yystr, len M4_YY_CALL_LAST_ARG); > 2356c2363 > < YY_BUFFER_STATE yy_scan_bytes YYFARGS2( yyconst char *,yybytes, int ,_yybytes_len) > --- > > YY_BUFFER_STATE yy_scan_chars YYFARGS2( yyconst YY_CHAR *,yychars, int ,_yybytes_len) > 2359c2366 > < char *buf; > --- > > YY_CHAR *buf; > 2365,2366c2372,2373 > < n = _yybytes_len + 2; > < buf = (char *) yyalloc( n M4_YY_CALL_LAST_ARG ); > --- > > n = _yychars_len + 2; > > buf = (YY_CHAR *) yyalloc( n sizeof( YY_CHAR ) M4_YY_CALL_LAST_ARG ); > 2370,2371c2377,2378 > < for ( i = 0; i < _yybytes_len; ++i ) > < buf[i] = yybytes[i]; > --- > > for ( i = 0; i < _yychars_len; ++i ) > > buf[i] = yychars[i]; > 2373c2380 > < buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; > --- > > buf[_yychars_len] = buf[_yychars_len+1] = YY_END_OF_BUFFER_CHAR; > 2377c2384 > < YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); > --- > > YY_FATAL_ERROR( "bad buffer in yy_scan_chars()" ); > 2462c2469 > < static void yy_fatal_error YYFARGS1(yyconst char*, msg) > --- > > static void yy_fatal_error YYFARGS1(yyconst YY_CHAR*, msg) > 2490c2497 > < *YY_G(yy_c_buf_p) = '\0'; \ > --- > > *YY_G(yy_c_buf_p) = (YY_CHAR) '\0'; \ > 2945c2952 > < static void yy_flex_strncpy YYFARGS3( char*,s1, yyconst char *,s2, int,n) > --- > > static void yy_flex_strncpy YYFARGS3( YY_CHAR*,s1, yyconst YY_CHAR *,s2, int,n) > 2957c2964 > < static int yy_flex_strlen YYFARGS1( yyconst char *,s) > --- > > static int yy_flex_strlen YYFARGS1( yyconst YY_CHAR *,s) > Only in flex-2.5.35.U: flex.skl~ > diff flex-2.5.35/gen.c flex-2.5.35.U/gen.c > 941c941 > < indent_puts ("register char *yy_cp = YY_G(yy_c_buf_p);"); > --- > > indent_puts ("register YY_CHAR *yy_cp = YY_G(yy_c_buf_p);"); > 1690c1690 > < ("static char *yy_last_accepting_cpos;\n"); > --- > > ("static YY_CHAR *yy_last_accepting_cpos;\n"); > 1762c1762 > < outn ("static char *yy_full_match;"); > --- > > outn ("static YY_CHAR *yy_full_match;"); > 1857,1858c1857,1858 > < outn ("char yytext[YYLMAX];"); > < outn ("char *yytext_ptr;"); > --- > > outn ("YY_CHAR yytext[YYLMAX];"); > > outn ("YY_CHAR *yytext_ptr;"); > 1864c1864 > < outn ("char *yytext;"); > --- > > outn ("YY_CHAR *yytext;"); > 1877c1877 > < outn ("\twhile ( (result = read( fileno(yyin), (char *) buf, max_size )) < 0 ) \\"); > --- > > outn ("\twhile ( (result = read( fileno(yyin), (char *) buf, max_size* sizeof( YY_CHAR ) )) < 0 ) \\"); > 1895,1896c1895,1905 > < outn ("\t\t\t (c = getc( yyin )) != EOF && c != '\\n'; ++n ) \\"); > < outn ("\t\t\tbuf[n] = (char) c; \\"); > --- > > > > > > if ( csize == 65536 ) > > outn( > > "\t\t\t (c = getwc( yyin )) != WEOF && c != '\\n'; ++n ) \\" ); > > else > > outn( > > "\t\t\t (c = getc( yyin )) != EOF && c != '\\n'; ++n ) \\" ); > > > > outn( "\t\t\tbuf[n] = (YY_CHAR) c; \\" ); > > > 1898,1899c1907,1918 > < outn ("\t\t\tbuf[n++] = (char) c; \\"); > < outn ("\t\tif ( c == EOF && ferror( yyin ) ) \\"); > --- > > > > outn( "\t\t\tbuf[n++] = (YY_CHAR) c; \\" ); > > > > if ( csize == 65536 ) > > outn( > > "\t\tif ( c == WEOF && ferror( yyin ) ) \\" ); > > else > > outn( > > "\t\tif ( c == EOF && ferror( yyin ) ) \\" ); > > > > > > > 1902a1922 > > > 1906c1926 > < outn ("\t\twhile ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \\"); > --- > > outn ("\t\twhile ( (result = fread(buf, sizeof( YY_CHAR ), max_size, yyin))==0 && ferror(yyin)) \\"); > Only in flex-2.5.35.U: gen.c~ > Common subdirectories: flex-2.5.35/m4 and flex-2.5.35.U/m4 > diff flex-2.5.35/main.c flex-2.5.35.U/main.c > 96c96 > < Char *ccltbl; > --- > > wchar_t *ccltbl; > 265c265 > < csize = CSIZE; > --- > > csize = 256; > 306a307,326 > > if ( csize == 65536 ) > > { > > if ( fulltbl ) > > { > > if ( use_read ) > > flexerror( _( "Can't use -f with -U" ) ); > > else > > flexerror( _( "Can't use -Cf with -U" ) ); > > } > > else if ( fullspd ) > > { > > if ( use_read ) > > flexerror( _( "Can't use -F with -U" ) ); > > else > > flexerror( _( "Can't use -CF with -U" ) ); > > } > > else if ( ! useecs && ! usemecs ) > > flexerror( _( "Can't use -C with -U" ) ); > > } > > > 483a504,532 > > outn( "/* Define the YY_CHAR type. */" ); > > > > switch (csize) { > > case 65536: > > outn( "#include <wchar.h>" ); > > outn( "typedef unsigned short YY_CHAR;" ); > > break; > > case 256: > > outn( "typedef unsigned char YY_CHAR;" ); > > break; > > default: > > outn( "typedef char YY_CHAR;" ); > > break; > > } > > > > outn( "\n/* Promotes a YY_CHAR to an unsigned integer for use as an array index. */"); > > > > switch (csize) { > > case 65536: > > case 256: > > outn( "#define YY_SC_TO_UI(c) ((unsigned int) (unsigned short) c)" ); > > break; > > default: > > outn( > > "#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)" ); > > break; > > } > > > > skelout(); > 789a839,840 > > else if ( csize == 256 ) > > putc( '8', stderr ); > 791c842 > < putc ('8', stderr); > --- > > putc( 'U', stderr ); > 1208c1259,1263 > < csize = CSIZE; > --- > > csize = 256; > > break; > > > > case OPT_UNICODE: > > csize = 65536; > 1589,1592c1644,1647 > < if (csize == 256) > < outn ("typedef unsigned char YY_CHAR;"); > < else > < outn ("typedef char YY_CHAR;"); > --- > > //if (csize == 256) > > // outn ("typedef unsigned char YY_CHAR;"); > > //else > > // outn ("typedef char YY_CHAR;"); > 1677c1732 > < outn ("extern char yytext[];\n"); > --- > > outn ("extern YY_CHAR yytext[];\n"); > 1684c1739 > < outn ("extern char *yytext;"); > --- > > outn ("extern YY_CHAR *yytext;"); > 1744c1799 > < ccltbl = allocate_Character_array (current_max_ccl_tbl_size); > --- > > ccltbl = allocate_wchar_array (current_max_ccl_tbl_size); > 1830c1885,1886 > < " -B, --batch generate batch scanner (opposite of -I)\n" > --- > > " -U, generate 16-bit scanner\n" > > " -B, --batch generate batch scanner (opposite of -I)\n" > Only in flex-2.5.35.U: main.c~ > Only in flex-2.5.35: Makefile > diff flex-2.5.35/misc.c flex-2.5.35.U/misc.c > 254,256c254,264 > < lerrsf (_ > < ("scanner requires -8 flag to use the character %s"), > < readable_form (c)); > --- > > { > > if ( c < 256 ) > > lerrsf( > > _( "scanner requires -8 flag to use the character %s" ), > > readable_form( c ) ); > > else > > lerrsf( > > _( "scanner requires -U flag to use the character %s" ), > > readable_form( c ) ); > > > > } > 336c344 > < Char v[]; > --- > > wchar_t v[]; > 340c348 > < Char k; > --- > > wchar_t k; > 615c623 > < Char myesc (array) > --- > > int myesc (array) > 618c626,627 > < Char c, esc_char; > --- > > Char c; > > unsigned int esc_char; > Only in flex-2.5.35.U: misc.c~ > diff flex-2.5.35/options.c flex-2.5.35.U/options.c > 200,201c200,201 > < {"-U", OPT_8BIT, 0} > < , /* Do not include unistd.h */ > --- > > {"-U", OPT_UNICODE, 0} > > , > Only in flex-2.5.35: options.c~ > diff flex-2.5.35/options.h flex-2.5.35.U/options.h > 44a45 > > OPT_UNICODE, > Common subdirectories: flex-2.5.35/po and flex-2.5.35.U/po > Only in flex-2.5.35: stamp-h1 > diff flex-2.5.35/tblcmp.c flex-2.5.35.U/tblcmp.c > 687c687 > < Char transset[CSIZE + 1]; > --- > > wchar_t transset[CSIZE + 1]; > Only in flex-2.5.35.U: tblcmp.c~ > Common subdirectories: flex-2.5.35/tests and flex-2.5.35.U/tests > Common subdirectories: flex-2.5.35/tools and flex-2.5.35.U/tools > ------------------------------------------------------------------------------ > Virtualization & Cloud Management Using Capacity Planning > Cloud computing makes use of virtualization - but cloud computing > also focuses on allowing computing to be delivered as a service. > http://www.accelacomm.com/jaw/sfnl/114/51521223/ > _______________________________________________ > Flex-devel mailing list > Fle...@li... > https://lists.sourceforge.net/lists/listinfo/flex-devel -- Will Estes (wl...@us...) Flex Project Maintainer http://flex.sourceforge.net/ |