[Faxpp-devel] SF.net SVN: faxpp: [23] trunk/faxpp
Status: Beta
Brought to you by:
jpcs
|
From: <jp...@us...> - 2007-08-20 22:46:45
|
Revision: 23
http://faxpp.svn.sourceforge.net/faxpp/?rev=23&view=rev
Author: jpcs
Date: 2007-08-20 15:46:46 -0700 (Mon, 20 Aug 2007)
Log Message:
-----------
Fixed a bug that was causing all strings to be copied.
Tweaked the UTF-8 tokenizer states so that Latin1 could also use them.
Fixed column counting, and line counting for "\r\n" at a buffer
boundary.
Modified Paths:
--------------
trunk/faxpp/configure
trunk/faxpp/configure.in
trunk/faxpp/src/tokenizer_states.c
trunk/faxpp/src/tokenizer_states.h
trunk/faxpp/src/xml_tokenizer.c
Modified: trunk/faxpp/configure
===================================================================
--- trunk/faxpp/configure 2007-08-20 00:17:50 UTC (rev 22)
+++ trunk/faxpp/configure 2007-08-20 22:46:46 UTC (rev 23)
@@ -20600,7 +20600,97 @@
+{ echo "$as_me:$LINENO: checking for working memcmp" >&5
+echo $ECHO_N "checking for working memcmp... $ECHO_C" >&6; }
+if test "${ac_cv_func_memcmp_working+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+ if test "$cross_compiling" = yes; then
+ ac_cv_func_memcmp_working=no
+else
+ cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h. */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h. */
+$ac_includes_default
+int
+main ()
+{
+ /* Some versions of memcmp are not 8-bit clean. */
+ char c0 = '\100', c1 = '\200', c2 = '\201';
+ if (memcmp(&c0, &c2, 1) >= 0 || memcmp(&c1, &c2, 1) >= 0)
+ return 1;
+
+ /* The Next x86 OpenStep bug shows up only when comparing 16 bytes
+ or more and with at least one buffer not starting on a 4-byte boundary.
+ William Lewis provided this test program. */
+ {
+ char foo[21];
+ char bar[21];
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ char *a = foo + i;
+ char *b = bar + i;
+ strcpy (a, "--------01111111");
+ strcpy (b, "--------10000000");
+ if (memcmp (a, b, 16) >= 0)
+ return 1;
+ }
+ return 0;
+ }
+
+ ;
+ return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_link") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+ { (case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }; }; then
+ ac_cv_func_memcmp_working=yes
+else
+ echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_func_memcmp_working=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_memcmp_working" >&5
+echo "${ECHO_T}$ac_cv_func_memcmp_working" >&6; }
+test $ac_cv_func_memcmp_working = no && case " $LIBOBJS " in
+ *" memcmp.$ac_objext "* ) ;;
+ *) LIBOBJS="$LIBOBJS memcmp.$ac_objext"
+ ;;
+esac
+
+
+
for ac_header in stdlib.h
do
as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
Modified: trunk/faxpp/configure.in
===================================================================
--- trunk/faxpp/configure.in 2007-08-20 00:17:50 UTC (rev 22)
+++ trunk/faxpp/configure.in 2007-08-20 22:46:46 UTC (rev 23)
@@ -62,6 +62,7 @@
# Checks for library functions.
AC_FUNC_MALLOC
+AC_FUNC_MEMCMP
AC_FUNC_REALLOC
AC_CHECK_FUNCS([gettimeofday memmove memset strerror])
Modified: trunk/faxpp/src/tokenizer_states.c
===================================================================
--- trunk/faxpp/src/tokenizer_states.c 2007-08-20 00:17:50 UTC (rev 22)
+++ trunk/faxpp/src/tokenizer_states.c 2007-08-20 22:46:46 UTC (rev 23)
@@ -655,7 +655,7 @@
/* Check if it really was a one byte char */ \
if(env->current_char >= 0x80) { \
/* Decode properly */ \
- env->char_len = FAXPP_utf8_decode(env->position, env->buffer_end, &env->current_char); \
+ env->char_len = (env)->decode(env->position, env->buffer_end, &env->current_char); \
switch((env)->char_len) { \
case TRANSCODE_PREMATURE_END_OF_BUFFER: \
return PREMATURE_END_OF_BUFFER; \
Modified: trunk/faxpp/src/tokenizer_states.h
===================================================================
--- trunk/faxpp/src/tokenizer_states.h 2007-08-20 00:17:50 UTC (rev 22)
+++ trunk/faxpp/src/tokenizer_states.h 2007-08-20 22:46:46 UTC (rev 23)
@@ -280,12 +280,13 @@
if(err != 0) return err; \
} \
\
+ (env)->column += 1; \
(env)->position += (env)->char_len; \
}
#define token_start_position(env) \
{ \
- if((env)->encode) { \
+ if((env)->do_encode) { \
FAXPP_reset_buffer(&(env)->token_buffer); \
(env)->token.value.ptr = (env)->token_buffer.cursor; \
} else { \
@@ -353,25 +354,28 @@
}
#define LINE_ENDINGS \
- case '\n': \
case '\r': { \
Char32 next_char; \
- if((env)->current_char == '\r' && \
- (env)->decode((env)->position + (env)->char_len, (env)->buffer_end, &next_char) \
- != TRANSCODE_PREMATURE_END_OF_BUFFER && next_char == '\n') { \
- (env)->column += 1; \
- } else { \
- (env)->line += 1; \
- (env)->column = 0; \
+ if((env)->decode((env)->position + (env)->char_len, (env)->buffer_end, &next_char) \
+ == TRANSCODE_PREMATURE_END_OF_BUFFER) { \
+ if(!(env)->buffer_done) return PREMATURE_END_OF_BUFFER; \
+ goto LINE_ENDINGS_INC; \
} \
- }
+ else if(next_char != '\n') goto LINE_ENDINGS_INC; \
+ goto LINE_ENDINGS_END; \
+ } \
+ case '\n': \
+LINE_ENDINGS_INC: \
+ (env)->line += 1; \
+ (env)->column = (unsigned int)-1; \
+LINE_ENDINGS_END:
/*
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
*/
#define WHITESPACE \
LINE_ENDINGS \
- case '\t':\
+ case '\t': \
case ' '
#endif
Modified: trunk/faxpp/src/xml_tokenizer.c
===================================================================
--- trunk/faxpp/src/xml_tokenizer.c 2007-08-20 00:17:50 UTC (rev 22)
+++ trunk/faxpp/src/xml_tokenizer.c 2007-08-20 22:46:46 UTC (rev 23)
@@ -275,6 +275,8 @@
void
FAXPP_set_tokenizer_decode(FAXPP_Tokenizer *tokenizer, FAXPP_DecodeFunction decode)
{
+ tokenizer->do_encode = 1;
+
if(decode == FAXPP_utf16_native_decode ||
#ifdef WORDS_BIGENDIAN
decode == FAXPP_utf16_be_decode
@@ -299,6 +301,13 @@
tokenizer->start_element_name_state = utf8_start_element_name_state;
tokenizer->element_content_state = utf8_element_content_state;
}
+ else if(decode == FAXPP_iso_8859_1_decode) {
+ tokenizer->decode = FAXPP_iso_8859_1_decode;
+
+ // Latin1 can use the UTF-8 states, since the first 128 values are the same as UTF-8
+ tokenizer->start_element_name_state = utf8_start_element_name_state;
+ tokenizer->element_content_state = utf8_element_content_state;
+ }
else if(decode == FAXPP_ucs4_native_decode ||
#ifdef WORDS_BIGENDIAN
decode == FAXPP_ucs4_be_decode
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|