Thread: [Faxpp-devel] SF.net SVN: faxpp: [36] trunk/faxpp (Page 2)
Status: Beta
Brought to you by:
jpcs
|
From: <jp...@us...> - 2008-02-24 02:26:53
|
Revision: 36
http://faxpp.svn.sourceforge.net/faxpp/?rev=36&view=rev
Author: jpcs
Date: 2008-02-23 18:26:52 -0800 (Sat, 23 Feb 2008)
Log Message:
-----------
Added parsing of element declarations and attribute list declarations.
Modified Paths:
--------------
trunk/faxpp/Makefile.am
trunk/faxpp/Makefile.in
trunk/faxpp/include/faxpp/error.h
trunk/faxpp/include/faxpp/token.h
trunk/faxpp/src/doctype.c
trunk/faxpp/src/error.c
trunk/faxpp/src/token.c
trunk/faxpp/src/tokenizer_states.c
trunk/faxpp/src/tokenizer_states.h
trunk/faxpp/src/xml_parser.c
Added Paths:
-----------
trunk/faxpp/src/attlistdecl.c
trunk/faxpp/src/elementdecl.c
Modified: trunk/faxpp/Makefile.am
===================================================================
--- trunk/faxpp/Makefile.am 2008-01-10 12:26:27 UTC (rev 35)
+++ trunk/faxpp/Makefile.am 2008-02-24 02:26:52 UTC (rev 36)
@@ -32,7 +32,9 @@
src/xml_parser.h \
src/xml_tokenizer.c \
src/xml_tokenizer.h \
-src/doctype.c
+src/doctype.c \
+src/elementdecl.c \
+src/attlistdecl.c
tokenizer_example_SOURCES = examples/tokenizer_example.c
tokenizer_example_LDADD = libfaxpp.la
Modified: trunk/faxpp/Makefile.in
===================================================================
--- trunk/faxpp/Makefile.in 2008-01-10 12:26:27 UTC (rev 35)
+++ trunk/faxpp/Makefile.in 2008-02-24 02:26:52 UTC (rev 36)
@@ -63,7 +63,7 @@
am_libfaxpp_la_OBJECTS = buffer.lo cdata.lo char_classes.lo comment.lo \
element.lo error.lo event.lo pi.lo reference.lo token.lo \
tokenizer_states.lo transcode.lo xmldecl.lo xml_parser.lo \
- xml_tokenizer.lo doctype.lo
+ xml_tokenizer.lo doctype.lo elementdecl.lo attlistdecl.lo
libfaxpp_la_OBJECTS = $(am_libfaxpp_la_OBJECTS)
libfaxpp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
@@ -252,7 +252,9 @@
src/xml_parser.h \
src/xml_tokenizer.c \
src/xml_tokenizer.h \
-src/doctype.c
+src/doctype.c \
+src/elementdecl.c \
+src/attlistdecl.c
tokenizer_example_SOURCES = examples/tokenizer_example.c
tokenizer_example_LDADD = libfaxpp.la
@@ -374,12 +376,14 @@
distclean-compile:
-rm -f *.tab.c
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/attlistdecl.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/buffer.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cdata.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/char_classes.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/comment.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doctype.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/element.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elementdecl.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/event.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parser_example.Po@am__quote@
@@ -527,6 +531,20 @@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o doctype.lo `test -f 'src/doctype.c' || echo '$(srcdir)/'`src/doctype.c
+elementdecl.lo: src/elementdecl.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT elementdecl.lo -MD -MP -MF $(DEPDIR)/elementdecl.Tpo -c -o elementdecl.lo `test -f 'src/elementdecl.c' || echo '$(srcdir)/'`src/elementdecl.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/elementdecl.Tpo $(DEPDIR)/elementdecl.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='src/elementdecl.c' object='elementdecl.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o elementdecl.lo `test -f 'src/elementdecl.c' || echo '$(srcdir)/'`src/elementdecl.c
+
+attlistdecl.lo: src/attlistdecl.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT attlistdecl.lo -MD -MP -MF $(DEPDIR)/attlistdecl.Tpo -c -o attlistdecl.lo `test -f 'src/attlistdecl.c' || echo '$(srcdir)/'`src/attlistdecl.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/attlistdecl.Tpo $(DEPDIR)/attlistdecl.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='src/attlistdecl.c' object='attlistdecl.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o attlistdecl.lo `test -f 'src/attlistdecl.c' || echo '$(srcdir)/'`src/attlistdecl.c
+
parser_example.o: examples/parser_example.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT parser_example.o -MD -MP -MF $(DEPDIR)/parser_example.Tpo -c -o parser_example.o `test -f 'examples/parser_example.c' || echo '$(srcdir)/'`examples/parser_example.c
@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/parser_example.Tpo $(DEPDIR)/parser_example.Po
Modified: trunk/faxpp/include/faxpp/error.h
===================================================================
--- trunk/faxpp/include/faxpp/error.h 2008-01-10 12:26:27 UTC (rev 35)
+++ trunk/faxpp/include/faxpp/error.h 2008-02-24 02:26:52 UTC (rev 36)
@@ -52,6 +52,8 @@
INVALID_CHAR_IN_PUBID_LITERAL,
EXPECTING_PUBID_LITERAL,
INVALID_DTD_DECL,
+ INVALID_CHAR_IN_ELEMENTDECL_NAME,
+ INVALID_CHAR_IN_ATTLISTDECL_NAME,
OUT_OF_MEMORY,
ELEMENT_NAME_MISMATCH,
Modified: trunk/faxpp/include/faxpp/token.h
===================================================================
--- trunk/faxpp/include/faxpp/token.h 2008-01-10 12:26:27 UTC (rev 35)
+++ trunk/faxpp/include/faxpp/token.h 2008-02-24 02:26:52 UTC (rev 36)
@@ -72,6 +72,14 @@
SYSTEM_LITERAL_TOKEN,
PUBID_LITERAL_TOKEN,
+ ELEMENTDECL_PREFIX_TOKEN,
+ ELEMENTDECL_NAME_TOKEN,
+ ELEMENTDECL_CONTENT_TOKEN,
+
+ ATTLISTDECL_PREFIX_TOKEN,
+ ATTLISTDECL_NAME_TOKEN,
+ ATTLISTDECL_CONTENT_TOKEN,
+
END_OF_BUFFER_TOKEN = 99
} FAXPP_TokenType;
Added: trunk/faxpp/src/attlistdecl.c
===================================================================
--- trunk/faxpp/src/attlistdecl.c (rev 0)
+++ trunk/faxpp/src/attlistdecl.c 2008-02-24 02:26:52 UTC (rev 36)
@@ -0,0 +1,175 @@
+/*
+ * Copyright 2007 Doxological Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tokenizer_states.h"
+#include "char_classes.h"
+
+#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \
+FAXPP_Error \
+name(FAXPP_TokenizerEnv *env) \
+{ \
+ read_char(env); \
+\
+ switch(env->current_char) { \
+ case (ch): \
+ if((next_stored_state) != 0) env->stored_state = (next_stored_state); \
+ env->state = (next_state); \
+ next_char(env); \
+ break; \
+ LINE_ENDINGS \
+ default: \
+ next_char(env); \
+ return (error); \
+ } \
+ return NO_ERROR; \
+}
+
+SINGLE_CHAR_STATE(attlistdecl_initial_state1, 'T', 0, attlistdecl_initial_state2, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(attlistdecl_initial_state2, 'T', 0, attlistdecl_initial_state3, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(attlistdecl_initial_state3, 'L', 0, attlistdecl_initial_state4, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(attlistdecl_initial_state4, 'I', 0, attlistdecl_initial_state5, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(attlistdecl_initial_state5, 'S', 0, attlistdecl_initial_state6, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(attlistdecl_initial_state6, 'T', attlistdecl_name_state1, ws_plus_state, INVALID_DTD_DECL)
+
+FAXPP_Error
+attlistdecl_name_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = attlistdecl_name_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ATTLISTDECL_NAME;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_name_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = attlistdecl_content_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ATTLISTDECL_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '>':
+ env->state = attlistdecl_content_state;
+ token_end_position(env);
+ report_token(ATTLISTDECL_NAME_TOKEN, env);
+ // no next char
+ token_start_position(env);
+ return NO_ERROR;
+ case ':':
+ env->state = attlistdecl_name_seen_colon_state1;
+ token_end_position(env);
+ report_token(ATTLISTDECL_PREFIX_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ATTLISTDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = attlistdecl_name_seen_colon_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ATTLISTDECL_NAME;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_name_seen_colon_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = attlistdecl_content_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ATTLISTDECL_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '>':
+ env->state = attlistdecl_content_state;
+ token_end_position(env);
+ report_token(ATTLISTDECL_NAME_TOKEN, env);
+ // no next char
+ token_start_position(env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ATTLISTDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_content_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '>':
+ base_state(env);
+ report_token(ATTLISTDECL_CONTENT_TOKEN, env);
+ break;
+ LINE_ENDINGS
+ default:
+ break;
+ }
+ next_char(env);
+ return NO_ERROR;
+}
+
Modified: trunk/faxpp/src/doctype.c
===================================================================
--- trunk/faxpp/src/doctype.c 2008-01-10 12:26:27 UTC (rev 35)
+++ trunk/faxpp/src/doctype.c 2008-02-24 02:26:52 UTC (rev 36)
@@ -521,11 +521,10 @@
env->state = pi_name_start_state;
break;
case '!':
- env->state = comment_start_state1;
+ env->state = internal_subset_decl_state;
break;
LINE_ENDINGS
default:
-/* env->state = internal_subset_decl_state; */
next_char(env);
return INVALID_DTD_DECL;
}
@@ -536,6 +535,32 @@
}
FAXPP_Error
+internal_subset_decl_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '-':
+ env->state = comment_start_state2;
+ break;
+ case 'E':
+ env->state = elementdecl_initial_state1;
+ break;
+ case 'A':
+ env->state = attlistdecl_initial_state1;
+ break;
+ LINE_ENDINGS
+ default:
+ env->state = comment_content_state;
+ token_start_position(env);
+ next_char(env);
+ return INVALID_START_OF_COMMENT;
+ }
+ next_char(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
doctype_end_state(FAXPP_TokenizerEnv *env)
{
read_char(env);
Added: trunk/faxpp/src/elementdecl.c
===================================================================
--- trunk/faxpp/src/elementdecl.c (rev 0)
+++ trunk/faxpp/src/elementdecl.c 2008-02-24 02:26:52 UTC (rev 36)
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2007 Doxological Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tokenizer_states.h"
+#include "char_classes.h"
+
+#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \
+FAXPP_Error \
+name(FAXPP_TokenizerEnv *env) \
+{ \
+ read_char(env); \
+\
+ switch(env->current_char) { \
+ case (ch): \
+ if((next_stored_state) != 0) env->stored_state = (next_stored_state); \
+ env->state = (next_state); \
+ next_char(env); \
+ break; \
+ LINE_ENDINGS \
+ default: \
+ next_char(env); \
+ return (error); \
+ } \
+ return NO_ERROR; \
+}
+
+SINGLE_CHAR_STATE(elementdecl_initial_state1, 'L', 0, elementdecl_initial_state2, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state2, 'E', 0, elementdecl_initial_state3, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state3, 'M', 0, elementdecl_initial_state4, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state4, 'E', 0, elementdecl_initial_state5, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state5, 'N', 0, elementdecl_initial_state6, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state6, 'T', elementdecl_name_state1, ws_plus_state, INVALID_DTD_DECL)
+
+FAXPP_Error
+elementdecl_name_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = elementdecl_name_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ELEMENTDECL_NAME;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_name_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = elementdecl_content_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ELEMENTDECL_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case ':':
+ env->state = elementdecl_name_seen_colon_state1;
+ token_end_position(env);
+ report_token(ELEMENTDECL_PREFIX_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ELEMENTDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = elementdecl_name_seen_colon_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ELEMENTDECL_NAME;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_name_seen_colon_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = elementdecl_content_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ELEMENTDECL_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ELEMENTDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_content_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '>':
+ base_state(env);
+ report_token(ELEMENTDECL_CONTENT_TOKEN, env);
+ break;
+ LINE_ENDINGS
+ default:
+ break;
+ }
+ next_char(env);
+ return NO_ERROR;
+}
+
Modified: trunk/faxpp/src/error.c
===================================================================
--- trunk/faxpp/src/error.c 2008-01-10 12:26:27 UTC (rev 35)
+++ trunk/faxpp/src/error.c 2008-02-24 02:26:52 UTC (rev 36)
@@ -99,6 +99,10 @@
return "EXPECTING_PUBID_LITERAL";
case INVALID_DTD_DECL:
return "INVALID_DTD_DECL";
+ case INVALID_CHAR_IN_ELEMENTDECL_NAME:
+ return "INVALID_CHAR_IN_ELEMENTDECL_NAME";
+ case INVALID_CHAR_IN_ATTLISTDECL_NAME:
+ return "INVALID_CHAR_IN_ATTLISTDECL_NAME";
case NO_ERROR:
break;
}
Modified: trunk/faxpp/src/token.c
===================================================================
--- trunk/faxpp/src/token.c 2008-01-10 12:26:27 UTC (rev 35)
+++ trunk/faxpp/src/token.c 2008-02-24 02:26:52 UTC (rev 36)
@@ -97,6 +97,20 @@
case PUBID_LITERAL_TOKEN:
return "PUBID_LITERAL_TOKEN";
+ case ELEMENTDECL_PREFIX_TOKEN:
+ return "ELEMENTDECL_PREFIX_TOKEN";
+ case ELEMENTDECL_NAME_TOKEN:
+ return "ELEMENTDECL_NAME_TOKEN";
+ case ELEMENTDECL_CONTENT_TOKEN:
+ return "ELEMENTDECL_CONTENT_TOKEN";
+
+ case ATTLISTDECL_PREFIX_TOKEN:
+ return "ATTLISTDECL_PREFIX_TOKEN";
+ case ATTLISTDECL_NAME_TOKEN:
+ return "ATTLISTDECL_NAME_TOKEN";
+ case ATTLISTDECL_CONTENT_TOKEN:
+ return "ATTLISTDECL_CONTENT_TOKEN";
+
case NO_TOKEN:
break;
}
Modified: trunk/faxpp/src/tokenizer_states.c
===================================================================
--- trunk/faxpp/src/tokenizer_states.c 2008-01-10 12:26:27 UTC (rev 35)
+++ trunk/faxpp/src/tokenizer_states.c 2008-02-24 02:26:52 UTC (rev 36)
@@ -417,6 +417,8 @@
return "internal_subset_state";
else if(state == internal_subset_markup_state)
return "internal_subset_markup_state";
+ else if(state == internal_subset_decl_state)
+ return "internal_subset_decl_state";
else if(state == doctype_end_state)
return "doctype_end_state";
@@ -458,6 +460,56 @@
else if(state == pubid_literal_quot_state)
return "pubid_literal_quot_state";
+ else if(state == elementdecl_initial_state1)
+ return "elementdecl_initial_state1";
+ else if(state == elementdecl_initial_state1)
+ return "elementdecl_initial_state1";
+ else if(state == elementdecl_initial_state2)
+ return "elementdecl_initial_state2";
+ else if(state == elementdecl_initial_state3)
+ return "elementdecl_initial_state3";
+ else if(state == elementdecl_initial_state4)
+ return "elementdecl_initial_state4";
+ else if(state == elementdecl_initial_state5)
+ return "elementdecl_initial_state5";
+ else if(state == elementdecl_initial_state6)
+ return "elementdecl_initial_state6";
+ else if(state == elementdecl_name_state1)
+ return "elementdecl_name_state1";
+ else if(state == elementdecl_name_state2)
+ return "elementdecl_name_state2";
+ else if(state == elementdecl_name_seen_colon_state1)
+ return "elementdecl_name_seen_colon_state1";
+ else if(state == elementdecl_name_seen_colon_state2)
+ return "elementdecl_name_seen_colon_state2";
+ else if(state == elementdecl_content_state)
+ return "elementdecl_content_state";
+
+ else if(state == attlistdecl_initial_state1)
+ return "attlistdecl_initial_state1";
+ else if(state == attlistdecl_initial_state1)
+ return "attlistdecl_initial_state1";
+ else if(state == attlistdecl_initial_state2)
+ return "attlistdecl_initial_state2";
+ else if(state == attlistdecl_initial_state3)
+ return "attlistdecl_initial_state3";
+ else if(state == attlistdecl_initial_state4)
+ return "attlistdecl_initial_state4";
+ else if(state == attlistdecl_initial_state5)
+ return "attlistdecl_initial_state5";
+ else if(state == attlistdecl_initial_state6)
+ return "attlistdecl_initial_state6";
+ else if(state == attlistdecl_name_state1)
+ return "attlistdecl_name_state1";
+ else if(state == attlistdecl_name_state2)
+ return "attlistdecl_name_state2";
+ else if(state == attlistdecl_name_seen_colon_state1)
+ return "attlistdecl_name_seen_colon_state1";
+ else if(state == attlistdecl_name_seen_colon_state2)
+ return "attlistdecl_name_seen_colon_state2";
+ else if(state == attlistdecl_content_state)
+ return "attlistdecl_content_state";
+
return "unknown";
}
#endif
Modified: trunk/faxpp/src/tokenizer_states.h
===================================================================
--- trunk/faxpp/src/tokenizer_states.h 2008-01-10 12:26:27 UTC (rev 35)
+++ trunk/faxpp/src/tokenizer_states.h 2008-02-24 02:26:52 UTC (rev 36)
@@ -256,6 +256,7 @@
FAXPP_Error doctype_internal_subset_start_state(FAXPP_TokenizerEnv *env);
FAXPP_Error internal_subset_state(FAXPP_TokenizerEnv *env);
FAXPP_Error internal_subset_markup_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error internal_subset_decl_state(FAXPP_TokenizerEnv *env);
FAXPP_Error doctype_end_state(FAXPP_TokenizerEnv *env);
FAXPP_Error system_id_initial_state1(FAXPP_TokenizerEnv *env);
@@ -278,7 +279,31 @@
FAXPP_Error pubid_literal_apos_state(FAXPP_TokenizerEnv *env);
FAXPP_Error pubid_literal_quot_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_initial_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_initial_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_initial_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_initial_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_initial_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_initial_state6(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_name_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_name_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_name_seen_colon_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_content_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error attlistdecl_initial_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error attlistdecl_initial_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error attlistdecl_initial_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error attlistdecl_initial_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error attlistdecl_initial_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error attlistdecl_initial_state6(FAXPP_TokenizerEnv *env);
+FAXPP_Error attlistdecl_name_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error attlistdecl_name_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error attlistdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error attlistdecl_name_seen_colon_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error attlistdecl_content_state(FAXPP_TokenizerEnv *env);
+
+
/*********************
*
* Tokenizer Helper Functions
Modified: trunk/faxpp/src/xml_parser.c
===================================================================
--- trunk/faxpp/src/xml_parser.c 2008-01-10 12:26:27 UTC (rev 35)
+++ trunk/faxpp/src/xml_parser.c 2008-02-24 02:26:52 UTC (rev 36)
@@ -820,6 +820,12 @@
case SYSTEM_LITERAL_TOKEN:
case PUBID_LITERAL_TOKEN:
case PE_REFERENCE_TOKEN:
+ case ELEMENTDECL_PREFIX_TOKEN:
+ case ELEMENTDECL_NAME_TOKEN:
+ case ELEMENTDECL_CONTENT_TOKEN:
+ case ATTLISTDECL_PREFIX_TOKEN:
+ case ATTLISTDECL_NAME_TOKEN:
+ case ATTLISTDECL_CONTENT_TOKEN:
// TBD - jpcs
break;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <jp...@us...> - 2008-02-25 12:05:15
|
Revision: 37
http://faxpp.svn.sourceforge.net/faxpp/?rev=37&view=rev
Author: jpcs
Date: 2008-02-25 04:05:13 -0800 (Mon, 25 Feb 2008)
Log Message:
-----------
Implemented tokenization of notation declarations.
Modified Paths:
--------------
trunk/faxpp/Makefile.am
trunk/faxpp/Makefile.in
trunk/faxpp/include/faxpp/error.h
trunk/faxpp/include/faxpp/token.h
trunk/faxpp/src/doctype.c
trunk/faxpp/src/error.c
trunk/faxpp/src/token.c
trunk/faxpp/src/tokenizer_states.c
trunk/faxpp/src/tokenizer_states.h
trunk/faxpp/src/xml_parser.c
Added Paths:
-----------
trunk/faxpp/src/notationdecl.c
Modified: trunk/faxpp/Makefile.am
===================================================================
--- trunk/faxpp/Makefile.am 2008-02-24 02:26:52 UTC (rev 36)
+++ trunk/faxpp/Makefile.am 2008-02-25 12:05:13 UTC (rev 37)
@@ -34,7 +34,8 @@
src/xml_tokenizer.h \
src/doctype.c \
src/elementdecl.c \
-src/attlistdecl.c
+src/attlistdecl.c \
+src/notationdecl.c
tokenizer_example_SOURCES = examples/tokenizer_example.c
tokenizer_example_LDADD = libfaxpp.la
Modified: trunk/faxpp/Makefile.in
===================================================================
--- trunk/faxpp/Makefile.in 2008-02-24 02:26:52 UTC (rev 36)
+++ trunk/faxpp/Makefile.in 2008-02-25 12:05:13 UTC (rev 37)
@@ -63,7 +63,8 @@
am_libfaxpp_la_OBJECTS = buffer.lo cdata.lo char_classes.lo comment.lo \
element.lo error.lo event.lo pi.lo reference.lo token.lo \
tokenizer_states.lo transcode.lo xmldecl.lo xml_parser.lo \
- xml_tokenizer.lo doctype.lo elementdecl.lo attlistdecl.lo
+ xml_tokenizer.lo doctype.lo elementdecl.lo attlistdecl.lo \
+ notationdecl.lo
libfaxpp_la_OBJECTS = $(am_libfaxpp_la_OBJECTS)
libfaxpp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
@@ -254,7 +255,8 @@
src/xml_tokenizer.h \
src/doctype.c \
src/elementdecl.c \
-src/attlistdecl.c
+src/attlistdecl.c \
+src/notationdecl.c
tokenizer_example_SOURCES = examples/tokenizer_example.c
tokenizer_example_LDADD = libfaxpp.la
@@ -386,6 +388,7 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elementdecl.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/event.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/notationdecl.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parser_example.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pi.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reference.Plo@am__quote@
@@ -545,6 +548,13 @@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o attlistdecl.lo `test -f 'src/attlistdecl.c' || echo '$(srcdir)/'`src/attlistdecl.c
+notationdecl.lo: src/notationdecl.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT notationdecl.lo -MD -MP -MF $(DEPDIR)/notationdecl.Tpo -c -o notationdecl.lo `test -f 'src/notationdecl.c' || echo '$(srcdir)/'`src/notationdecl.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/notationdecl.Tpo $(DEPDIR)/notationdecl.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='src/notationdecl.c' object='notationdecl.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o notationdecl.lo `test -f 'src/notationdecl.c' || echo '$(srcdir)/'`src/notationdecl.c
+
parser_example.o: examples/parser_example.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT parser_example.o -MD -MP -MF $(DEPDIR)/parser_example.Tpo -c -o parser_example.o `test -f 'examples/parser_example.c' || echo '$(srcdir)/'`examples/parser_example.c
@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/parser_example.Tpo $(DEPDIR)/parser_example.Po
Modified: trunk/faxpp/include/faxpp/error.h
===================================================================
--- trunk/faxpp/include/faxpp/error.h 2008-02-24 02:26:52 UTC (rev 36)
+++ trunk/faxpp/include/faxpp/error.h 2008-02-25 12:05:13 UTC (rev 37)
@@ -54,6 +54,7 @@
INVALID_DTD_DECL,
INVALID_CHAR_IN_ELEMENTDECL_NAME,
INVALID_CHAR_IN_ATTLISTDECL_NAME,
+ INVALID_CHAR_IN_NOTATIONDECL_NAME,
OUT_OF_MEMORY,
ELEMENT_NAME_MISMATCH,
Modified: trunk/faxpp/include/faxpp/token.h
===================================================================
--- trunk/faxpp/include/faxpp/token.h 2008-02-24 02:26:52 UTC (rev 36)
+++ trunk/faxpp/include/faxpp/token.h 2008-02-25 12:05:13 UTC (rev 37)
@@ -80,6 +80,9 @@
ATTLISTDECL_NAME_TOKEN,
ATTLISTDECL_CONTENT_TOKEN,
+ NOTATIONDECL_NAME_TOKEN,
+ NOTATIONDECL_CONTENT_TOKEN,
+
END_OF_BUFFER_TOKEN = 99
} FAXPP_TokenType;
Modified: trunk/faxpp/src/doctype.c
===================================================================
--- trunk/faxpp/src/doctype.c 2008-02-24 02:26:52 UTC (rev 36)
+++ trunk/faxpp/src/doctype.c 2008-02-25 12:05:13 UTC (rev 37)
@@ -549,6 +549,9 @@
case 'A':
env->state = attlistdecl_initial_state1;
break;
+ case 'N':
+ env->state = notationdecl_initial_state1;
+ break;
LINE_ENDINGS
default:
env->state = comment_content_state;
Modified: trunk/faxpp/src/error.c
===================================================================
--- trunk/faxpp/src/error.c 2008-02-24 02:26:52 UTC (rev 36)
+++ trunk/faxpp/src/error.c 2008-02-25 12:05:13 UTC (rev 37)
@@ -103,6 +103,8 @@
return "INVALID_CHAR_IN_ELEMENTDECL_NAME";
case INVALID_CHAR_IN_ATTLISTDECL_NAME:
return "INVALID_CHAR_IN_ATTLISTDECL_NAME";
+ case INVALID_CHAR_IN_NOTATIONDECL_NAME:
+ return "INVALID_CHAR_IN_NOTATIONDECL_NAME";
case NO_ERROR:
break;
}
Added: trunk/faxpp/src/notationdecl.c
===================================================================
--- trunk/faxpp/src/notationdecl.c (rev 0)
+++ trunk/faxpp/src/notationdecl.c 2008-02-25 12:05:13 UTC (rev 37)
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2007 Doxological Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tokenizer_states.h"
+#include "char_classes.h"
+
+#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \
+FAXPP_Error \
+name(FAXPP_TokenizerEnv *env) \
+{ \
+ read_char(env); \
+\
+ switch(env->current_char) { \
+ case (ch): \
+ if((next_stored_state) != 0) env->stored_state = (next_stored_state); \
+ env->state = (next_state); \
+ next_char(env); \
+ break; \
+ LINE_ENDINGS \
+ default: \
+ next_char(env); \
+ return (error); \
+ } \
+ return NO_ERROR; \
+}
+
+SINGLE_CHAR_STATE(notationdecl_initial_state1, 'O', 0, notationdecl_initial_state2, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(notationdecl_initial_state2, 'T', 0, notationdecl_initial_state3, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(notationdecl_initial_state3, 'A', 0, notationdecl_initial_state4, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(notationdecl_initial_state4, 'T', 0, notationdecl_initial_state5, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(notationdecl_initial_state5, 'I', 0, notationdecl_initial_state6, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(notationdecl_initial_state6, 'O', 0, notationdecl_initial_state7, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(notationdecl_initial_state7, 'N', notationdecl_name_state1, ws_plus_state, INVALID_DTD_DECL)
+
+FAXPP_Error
+notationdecl_name_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = notationdecl_name_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_NOTATIONDECL_NAME;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+notationdecl_name_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = notationdecl_content_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(NOTATIONDECL_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_NOTATIONDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+notationdecl_content_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '>':
+ base_state(env);
+ report_token(NOTATIONDECL_CONTENT_TOKEN, env);
+ break;
+ LINE_ENDINGS
+ default:
+ break;
+ }
+ next_char(env);
+ return NO_ERROR;
+}
+
Modified: trunk/faxpp/src/token.c
===================================================================
--- trunk/faxpp/src/token.c 2008-02-24 02:26:52 UTC (rev 36)
+++ trunk/faxpp/src/token.c 2008-02-25 12:05:13 UTC (rev 37)
@@ -111,6 +111,11 @@
case ATTLISTDECL_CONTENT_TOKEN:
return "ATTLISTDECL_CONTENT_TOKEN";
+ case NOTATIONDECL_NAME_TOKEN:
+ return "NOTATIONDECL_NAME_TOKEN";
+ case NOTATIONDECL_CONTENT_TOKEN:
+ return "NOTATIONDECL_CONTENT_TOKEN";
+
case NO_TOKEN:
break;
}
Modified: trunk/faxpp/src/tokenizer_states.c
===================================================================
--- trunk/faxpp/src/tokenizer_states.c 2008-02-24 02:26:52 UTC (rev 36)
+++ trunk/faxpp/src/tokenizer_states.c 2008-02-25 12:05:13 UTC (rev 37)
@@ -510,6 +510,29 @@
else if(state == attlistdecl_content_state)
return "attlistdecl_content_state";
+ else if(state == notationdecl_initial_state1)
+ return "notationdecl_initial_state1";
+ else if(state == notationdecl_initial_state1)
+ return "notationdecl_initial_state1";
+ else if(state == notationdecl_initial_state2)
+ return "notationdecl_initial_state2";
+ else if(state == notationdecl_initial_state3)
+ return "notationdecl_initial_state3";
+ else if(state == notationdecl_initial_state4)
+ return "notationdecl_initial_state4";
+ else if(state == notationdecl_initial_state5)
+ return "notationdecl_initial_state5";
+ else if(state == notationdecl_initial_state6)
+ return "notationdecl_initial_state6";
+ else if(state == notationdecl_initial_state7)
+ return "notationdecl_initial_state7";
+ else if(state == notationdecl_name_state1)
+ return "notationdecl_name_state1";
+ else if(state == notationdecl_name_state2)
+ return "notationdecl_name_state2";
+ else if(state == notationdecl_content_state)
+ return "notationdecl_content_state";
+
return "unknown";
}
#endif
Modified: trunk/faxpp/src/tokenizer_states.h
===================================================================
--- trunk/faxpp/src/tokenizer_states.h 2008-02-24 02:26:52 UTC (rev 36)
+++ trunk/faxpp/src/tokenizer_states.h 2008-02-25 12:05:13 UTC (rev 37)
@@ -303,7 +303,18 @@
FAXPP_Error attlistdecl_name_seen_colon_state2(FAXPP_TokenizerEnv *env);
FAXPP_Error attlistdecl_content_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_initial_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_initial_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_initial_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_initial_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_initial_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_initial_state6(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_initial_state7(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_name_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_name_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_content_state(FAXPP_TokenizerEnv *env);
+
/*********************
*
* Tokenizer Helper Functions
Modified: trunk/faxpp/src/xml_parser.c
===================================================================
--- trunk/faxpp/src/xml_parser.c 2008-02-24 02:26:52 UTC (rev 36)
+++ trunk/faxpp/src/xml_parser.c 2008-02-25 12:05:13 UTC (rev 37)
@@ -826,6 +826,8 @@
case ATTLISTDECL_PREFIX_TOKEN:
case ATTLISTDECL_NAME_TOKEN:
case ATTLISTDECL_CONTENT_TOKEN:
+ case NOTATIONDECL_NAME_TOKEN:
+ case NOTATIONDECL_CONTENT_TOKEN:
// TBD - jpcs
break;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <jp...@us...> - 2008-02-27 09:44:20
|
Revision: 38
http://faxpp.svn.sourceforge.net/faxpp/?rev=38&view=rev
Author: jpcs
Date: 2008-02-27 01:44:24 -0800 (Wed, 27 Feb 2008)
Log Message:
-----------
Added code to tokenize entity declarations.
Modified Paths:
--------------
trunk/faxpp/Makefile.am
trunk/faxpp/Makefile.in
trunk/faxpp/include/faxpp/error.h
trunk/faxpp/include/faxpp/token.h
trunk/faxpp/src/attlistdecl.c
trunk/faxpp/src/doctype.c
trunk/faxpp/src/elementdecl.c
trunk/faxpp/src/error.c
trunk/faxpp/src/notationdecl.c
trunk/faxpp/src/token.c
trunk/faxpp/src/tokenizer_states.c
trunk/faxpp/src/tokenizer_states.h
trunk/faxpp/src/xml_parser.c
trunk/faxpp/src/xml_parser.h
Added Paths:
-----------
trunk/faxpp/src/entitydecl.c
Modified: trunk/faxpp/Makefile.am
===================================================================
--- trunk/faxpp/Makefile.am 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/Makefile.am 2008-02-27 09:44:24 UTC (rev 38)
@@ -35,7 +35,8 @@
src/doctype.c \
src/elementdecl.c \
src/attlistdecl.c \
-src/notationdecl.c
+src/notationdecl.c \
+src/entitydecl.c
tokenizer_example_SOURCES = examples/tokenizer_example.c
tokenizer_example_LDADD = libfaxpp.la
Modified: trunk/faxpp/Makefile.in
===================================================================
--- trunk/faxpp/Makefile.in 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/Makefile.in 2008-02-27 09:44:24 UTC (rev 38)
@@ -64,7 +64,7 @@
element.lo error.lo event.lo pi.lo reference.lo token.lo \
tokenizer_states.lo transcode.lo xmldecl.lo xml_parser.lo \
xml_tokenizer.lo doctype.lo elementdecl.lo attlistdecl.lo \
- notationdecl.lo
+ notationdecl.lo entitydecl.lo
libfaxpp_la_OBJECTS = $(am_libfaxpp_la_OBJECTS)
libfaxpp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
@@ -256,7 +256,8 @@
src/doctype.c \
src/elementdecl.c \
src/attlistdecl.c \
-src/notationdecl.c
+src/notationdecl.c \
+src/entitydecl.c
tokenizer_example_SOURCES = examples/tokenizer_example.c
tokenizer_example_LDADD = libfaxpp.la
@@ -386,6 +387,7 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doctype.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/element.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elementdecl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/entitydecl.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/event.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/notationdecl.Plo@am__quote@
@@ -555,6 +557,13 @@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o notationdecl.lo `test -f 'src/notationdecl.c' || echo '$(srcdir)/'`src/notationdecl.c
+entitydecl.lo: src/entitydecl.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT entitydecl.lo -MD -MP -MF $(DEPDIR)/entitydecl.Tpo -c -o entitydecl.lo `test -f 'src/entitydecl.c' || echo '$(srcdir)/'`src/entitydecl.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/entitydecl.Tpo $(DEPDIR)/entitydecl.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='src/entitydecl.c' object='entitydecl.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o entitydecl.lo `test -f 'src/entitydecl.c' || echo '$(srcdir)/'`src/entitydecl.c
+
parser_example.o: examples/parser_example.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT parser_example.o -MD -MP -MF $(DEPDIR)/parser_example.Tpo -c -o parser_example.o `test -f 'examples/parser_example.c' || echo '$(srcdir)/'`examples/parser_example.c
@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/parser_example.Tpo $(DEPDIR)/parser_example.Po
Modified: trunk/faxpp/include/faxpp/error.h
===================================================================
--- trunk/faxpp/include/faxpp/error.h 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/include/faxpp/error.h 2008-02-27 09:44:24 UTC (rev 38)
@@ -55,6 +55,8 @@
INVALID_CHAR_IN_ELEMENTDECL_NAME,
INVALID_CHAR_IN_ATTLISTDECL_NAME,
INVALID_CHAR_IN_NOTATIONDECL_NAME,
+ INVALID_CHAR_IN_ENTITYDECL_NAME,
+ INVALID_ENTITYDECL,
OUT_OF_MEMORY,
ELEMENT_NAME_MISMATCH,
Modified: trunk/faxpp/include/faxpp/token.h
===================================================================
--- trunk/faxpp/include/faxpp/token.h 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/include/faxpp/token.h 2008-02-27 09:44:24 UTC (rev 38)
@@ -71,6 +71,7 @@
SYSTEM_LITERAL_TOKEN,
PUBID_LITERAL_TOKEN,
+ NDATA_NAME_TOKEN,
ELEMENTDECL_PREFIX_TOKEN,
ELEMENTDECL_NAME_TOKEN,
@@ -83,6 +84,11 @@
NOTATIONDECL_NAME_TOKEN,
NOTATIONDECL_CONTENT_TOKEN,
+ ENTITYDECL_NAME_TOKEN,
+ ENTITYDECL_VALUE_TOKEN,
+ ENTITYDECL_END_TOKEN,
+ PARAMENTITYDECL_NAME_TOKEN,
+
END_OF_BUFFER_TOKEN = 99
} FAXPP_TokenType;
Modified: trunk/faxpp/src/attlistdecl.c
===================================================================
--- trunk/faxpp/src/attlistdecl.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/attlistdecl.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -163,6 +163,7 @@
switch(env->current_char) {
case '>':
base_state(env);
+ token_end_position(env);
report_token(ATTLISTDECL_CONTENT_TOKEN, env);
break;
LINE_ENDINGS
Modified: trunk/faxpp/src/doctype.c
===================================================================
--- trunk/faxpp/src/doctype.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/doctype.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -398,6 +398,9 @@
case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
case 0x78: case 0x79: case 0x7A:
+ // 0-9
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8':
+ case '9':
case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':':
case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_':
case '%':
@@ -439,6 +442,9 @@
case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
case 0x78: case 0x79: case 0x7A:
+ // 0-9
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8':
+ case '9':
case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':':
case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_':
case '%': case '\'':
@@ -544,7 +550,7 @@
env->state = comment_start_state2;
break;
case 'E':
- env->state = elementdecl_initial_state1;
+ env->state = elementdecl_or_entitydecl_state;
break;
case 'A':
env->state = attlistdecl_initial_state1;
Modified: trunk/faxpp/src/elementdecl.c
===================================================================
--- trunk/faxpp/src/elementdecl.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/elementdecl.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -17,6 +17,28 @@
#include "tokenizer_states.h"
#include "char_classes.h"
+FAXPP_Error
+elementdecl_or_entitydecl_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case 'L':
+ env->state = elementdecl_initial_state1;
+ next_char(env);
+ break;
+ case 'N':
+ env->state = entitydecl_initial_state1;
+ next_char(env);
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_DTD_DECL;
+ }
+ return NO_ERROR;
+}
+
#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \
FAXPP_Error \
name(FAXPP_TokenizerEnv *env) \
@@ -37,12 +59,11 @@
return NO_ERROR; \
}
-SINGLE_CHAR_STATE(elementdecl_initial_state1, 'L', 0, elementdecl_initial_state2, INVALID_DTD_DECL)
-SINGLE_CHAR_STATE(elementdecl_initial_state2, 'E', 0, elementdecl_initial_state3, INVALID_DTD_DECL)
-SINGLE_CHAR_STATE(elementdecl_initial_state3, 'M', 0, elementdecl_initial_state4, INVALID_DTD_DECL)
-SINGLE_CHAR_STATE(elementdecl_initial_state4, 'E', 0, elementdecl_initial_state5, INVALID_DTD_DECL)
-SINGLE_CHAR_STATE(elementdecl_initial_state5, 'N', 0, elementdecl_initial_state6, INVALID_DTD_DECL)
-SINGLE_CHAR_STATE(elementdecl_initial_state6, 'T', elementdecl_name_state1, ws_plus_state, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state1, 'E', 0, elementdecl_initial_state2, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state2, 'M', 0, elementdecl_initial_state3, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state3, 'E', 0, elementdecl_initial_state4, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state4, 'N', 0, elementdecl_initial_state5, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state5, 'T', elementdecl_name_state1, ws_plus_state, INVALID_DTD_DECL)
FAXPP_Error
elementdecl_name_state1(FAXPP_TokenizerEnv *env)
@@ -149,6 +170,7 @@
switch(env->current_char) {
case '>':
base_state(env);
+ token_end_position(env);
report_token(ELEMENTDECL_CONTENT_TOKEN, env);
break;
LINE_ENDINGS
Added: trunk/faxpp/src/entitydecl.c
===================================================================
--- trunk/faxpp/src/entitydecl.c (rev 0)
+++ trunk/faxpp/src/entitydecl.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -0,0 +1,574 @@
+/*
+ * Copyright 2007 Doxological Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tokenizer_states.h"
+#include "char_classes.h"
+
+#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \
+FAXPP_Error \
+name(FAXPP_TokenizerEnv *env) \
+{ \
+ read_char(env); \
+\
+ switch(env->current_char) { \
+ case (ch): \
+ if((next_stored_state) != 0) env->stored_state = (next_stored_state); \
+ env->state = (next_state); \
+ next_char(env); \
+ break; \
+ LINE_ENDINGS \
+ default: \
+ next_char(env); \
+ return (error); \
+ } \
+ return NO_ERROR; \
+}
+
+SINGLE_CHAR_STATE(entitydecl_initial_state1, 'T', 0, entitydecl_initial_state2, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(entitydecl_initial_state2, 'I', 0, entitydecl_initial_state3, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(entitydecl_initial_state3, 'T', 0, entitydecl_initial_state4, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(entitydecl_initial_state4, 'Y', entitydecl_param_or_general_state, ws_plus_state, INVALID_DTD_DECL)
+
+FAXPP_Error
+entitydecl_param_or_general_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '%':
+ env->stored_state = paramentitydecl_name_state1;
+ env->state = ws_plus_state;
+ next_char(env);
+ break;
+ LINE_ENDINGS
+ default:
+ env->state = entitydecl_name_state;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ENTITYDECL_NAME;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+FAXPP_Error
+entitydecl_name_state(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = entitydecl_content_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ENTITYDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_content_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '"':
+ env->state = entitydecl_value_quot_state;
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '\'':
+ env->state = entitydecl_value_apos_state;
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case 'S':
+ env->stored_state = entitydecl_ws_state;
+ env->state = system_id_initial_state1;
+ break;
+ case 'P':
+ env->stored_state = entitydecl_ws_state;
+ env->state = public_id_initial_state1;
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_ENTITYDECL;
+ }
+ next_char(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_value_apos_state(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ if(env->position >= env->buffer_end) {
+ if(env->token.value.ptr) {
+ token_end_position(env);
+ if(env->token.value.len != 0) {
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ return NO_ERROR;
+ }
+ }
+ token_start_position(env);
+ return PREMATURE_END_OF_BUFFER;
+ }
+
+ read_char_no_check(env);
+
+ switch(env->current_char) {
+ case '\'':
+ env->state = entitydecl_end_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '&':
+ store_state(env);
+ env->state = reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ LINE_ENDINGS
+ default:
+ if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+ next_char(env);
+ return RESTRICTED_CHAR;
+ }
+ break;
+ }
+ next_char(env);
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_value_quot_state(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ if(env->position >= env->buffer_end) {
+ if(env->token.value.ptr) {
+ token_end_position(env);
+ if(env->token.value.len != 0) {
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ return NO_ERROR;
+ }
+ }
+ token_start_position(env);
+ return PREMATURE_END_OF_BUFFER;
+ }
+
+ read_char_no_check(env);
+
+ switch(env->current_char) {
+ case '"':
+ env->state = entitydecl_end_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '&':
+ store_state(env);
+ env->state = reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ LINE_ENDINGS
+ default:
+ if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+ next_char(env);
+ return RESTRICTED_CHAR;
+ }
+ break;
+ }
+ next_char(env);
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_ws_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->state = entitydecl_ndata_or_end_state;;
+ next_char(env);
+ break;
+ case '>':
+ env->state = entitydecl_end_state;
+ break;
+ default:
+ env->state = entitydecl_ndata_or_end_state;
+ return EXPECTING_WHITESPACE;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_ndata_or_end_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '>':
+ env->state = entitydecl_end_state;
+ break;
+ case 'N':
+ env->state = entitydecl_ndata_state1;
+ next_char(env);
+ break;
+ default:
+ next_char(env);
+ return INVALID_ENTITYDECL;
+ }
+ return NO_ERROR;
+}
+
+SINGLE_CHAR_STATE(entitydecl_ndata_state1, 'D', 0, entitydecl_ndata_state2, INVALID_ENTITYDECL)
+SINGLE_CHAR_STATE(entitydecl_ndata_state2, 'A', 0, entitydecl_ndata_state3, INVALID_ENTITYDECL)
+SINGLE_CHAR_STATE(entitydecl_ndata_state3, 'T', 0, entitydecl_ndata_state4, INVALID_ENTITYDECL)
+SINGLE_CHAR_STATE(entitydecl_ndata_state4, 'A', entitydecl_ndata_name_state1, ws_plus_state, INVALID_ENTITYDECL)
+
+FAXPP_Error
+entitydecl_ndata_name_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = entitydecl_ndata_name_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_ENTITYDECL;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_ndata_name_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->state = entitydecl_end_state;
+ token_end_position(env);
+ report_token(NDATA_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '>':
+ env->state = entitydecl_end_state;
+ token_end_position(env);
+ report_token(NDATA_NAME_TOKEN, env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_ENTITYDECL;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_end_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '>':
+ base_state(env);
+ report_empty_token(ENTITYDECL_END_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ break;
+ default:
+ next_char(env);
+ return INVALID_ENTITYDECL;
+ }
+ return NO_ERROR;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+FAXPP_Error
+paramentitydecl_name_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = paramentitydecl_name_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ENTITYDECL_NAME;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+paramentitydecl_name_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = paramentitydecl_content_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(PARAMENTITYDECL_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ENTITYDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+paramentitydecl_content_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '"':
+ env->state = paramentitydecl_value_quot_state;
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '\'':
+ env->state = paramentitydecl_value_apos_state;
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case 'S':
+ env->stored_state = paramentitydecl_end_state;
+ env->state = system_id_initial_state1;
+ break;
+ case 'P':
+ env->stored_state = paramentitydecl_end_state;
+ env->state = public_id_initial_state1;
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_ENTITYDECL;
+ }
+ next_char(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+paramentitydecl_value_apos_state(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ if(env->position >= env->buffer_end) {
+ if(env->token.value.ptr) {
+ token_end_position(env);
+ if(env->token.value.len != 0) {
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ return NO_ERROR;
+ }
+ }
+ token_start_position(env);
+ return PREMATURE_END_OF_BUFFER;
+ }
+
+ read_char_no_check(env);
+
+ switch(env->current_char) {
+ case '\'':
+ env->state = paramentitydecl_end_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '&':
+ store_state(env);
+ env->state = reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ LINE_ENDINGS
+ default:
+ if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+ next_char(env);
+ return RESTRICTED_CHAR;
+ }
+ break;
+ }
+ next_char(env);
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ if(env->position >= env->buffer_end) {
+ if(env->token.value.ptr) {
+ token_end_position(env);
+ if(env->token.value.len != 0) {
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ return NO_ERROR;
+ }
+ }
+ token_start_position(env);
+ return PREMATURE_END_OF_BUFFER;
+ }
+
+ read_char_no_check(env);
+
+ switch(env->current_char) {
+ case '"':
+ env->state = paramentitydecl_end_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '&':
+ store_state(env);
+ env->state = reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ LINE_ENDINGS
+ default:
+ if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+ next_char(env);
+ return RESTRICTED_CHAR;
+ }
+ break;
+ }
+ next_char(env);
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+paramentitydecl_end_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '>':
+ base_state(env);
+ report_empty_token(ENTITYDECL_END_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ break;
+ default:
+ next_char(env);
+ return INVALID_ENTITYDECL;
+ }
+ return NO_ERROR;
+}
+
Modified: trunk/faxpp/src/error.c
===================================================================
--- trunk/faxpp/src/error.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/error.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -105,6 +105,10 @@
return "INVALID_CHAR_IN_ATTLISTDECL_NAME";
case INVALID_CHAR_IN_NOTATIONDECL_NAME:
return "INVALID_CHAR_IN_NOTATIONDECL_NAME";
+ case INVALID_CHAR_IN_ENTITYDECL_NAME:
+ return "INVALID_CHAR_IN_ENTITYDECL_NAME";
+ case INVALID_ENTITYDECL:
+ return "INVALID_ENTITYDECL";
case NO_ERROR:
break;
}
Modified: trunk/faxpp/src/notationdecl.c
===================================================================
--- trunk/faxpp/src/notationdecl.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/notationdecl.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -98,6 +98,7 @@
switch(env->current_char) {
case '>':
base_state(env);
+ token_end_position(env);
report_token(NOTATIONDECL_CONTENT_TOKEN, env);
break;
LINE_ENDINGS
Modified: trunk/faxpp/src/token.c
===================================================================
--- trunk/faxpp/src/token.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/token.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -96,6 +96,8 @@
return "SYSTEM_LITERAL_TOKEN";
case PUBID_LITERAL_TOKEN:
return "PUBID_LITERAL_TOKEN";
+ case NDATA_NAME_TOKEN:
+ return "NDATA_NAME_TOKEN";
case ELEMENTDECL_PREFIX_TOKEN:
return "ELEMENTDECL_PREFIX_TOKEN";
@@ -116,6 +118,15 @@
case NOTATIONDECL_CONTENT_TOKEN:
return "NOTATIONDECL_CONTENT_TOKEN";
+ case ENTITYDECL_NAME_TOKEN:
+ return "ENTITYDECL_NAME_TOKEN";
+ case ENTITYDECL_VALUE_TOKEN:
+ return "ENTITYDECL_VALUE_TOKEN";
+ case ENTITYDECL_END_TOKEN:
+ return "ENTITYDECL_END_TOKEN";
+ case PARAMENTITYDECL_NAME_TOKEN:
+ return "PARAMENTITYDECL_NAME_TOKEN";
+
case NO_TOKEN:
break;
}
Modified: trunk/faxpp/src/tokenizer_states.c
===================================================================
--- trunk/faxpp/src/tokenizer_states.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/tokenizer_states.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -460,10 +460,10 @@
else if(state == pubid_literal_quot_state)
return "pubid_literal_quot_state";
+ else if(state == elementdecl_or_entitydecl_state)
+ return "elementdecl_or_entitydecl_state";
else if(state == elementdecl_initial_state1)
return "elementdecl_initial_state1";
- else if(state == elementdecl_initial_state1)
- return "elementdecl_initial_state1";
else if(state == elementdecl_initial_state2)
return "elementdecl_initial_state2";
else if(state == elementdecl_initial_state3)
@@ -472,8 +472,6 @@
return "elementdecl_initial_state4";
else if(state == elementdecl_initial_state5)
return "elementdecl_initial_state5";
- else if(state == elementdecl_initial_state6)
- return "elementdecl_initial_state6";
else if(state == elementdecl_name_state1)
return "elementdecl_name_state1";
else if(state == elementdecl_name_state2)
@@ -533,6 +531,59 @@
else if(state == notationdecl_content_state)
return "notationdecl_content_state";
+ else if(state == entitydecl_initial_state1)
+ return "entitydecl_initial_state1";
+ else if(state == entitydecl_initial_state1)
+ return "entitydecl_initial_state1";
+ else if(state == entitydecl_initial_state2)
+ return "entitydecl_initial_state2";
+ else if(state == entitydecl_initial_state3)
+ return "entitydecl_initial_state3";
+ else if(state == entitydecl_initial_state4)
+ return "entitydecl_initial_state4";
+ else if(state == entitydecl_param_or_general_state)
+ return "entitydecl_param_or_general_state";
+
+ else if(state == entitydecl_name_state)
+ return "entitydecl_name_state";
+ else if(state == entitydecl_content_state)
+ return "entitydecl_content_state";
+ else if(state == entitydecl_value_apos_state)
+ return "entitydecl_value_apos_state";
+ else if(state == entitydecl_value_quot_state)
+ return "entitydecl_value_quot_state";
+ else if(state == entitydecl_ws_state)
+ return "entitydecl_ws_state";
+ else if(state == entitydecl_ndata_or_end_state)
+ return "entitydecl_ndata_or_end_state";
+ else if(state == entitydecl_ndata_state1)
+ return "entitydecl_ndata_state1";
+ else if(state == entitydecl_ndata_state2)
+ return "entitydecl_ndata_state2";
+ else if(state == entitydecl_ndata_state3)
+ return "entitydecl_ndata_state3";
+ else if(state == entitydecl_ndata_state4)
+ return "entitydecl_ndata_state4";
+ else if(state == entitydecl_ndata_name_state1)
+ return "entitydecl_ndata_name_state1";
+ else if(state == entitydecl_ndata_name_state2)
+ return "entitydecl_ndata_name_state2";
+ else if(state == entitydecl_end_state)
+ return "entitydecl_end_state";
+
+ else if(state == paramentitydecl_name_state1)
+ return "paramentitydecl_name_state1";
+ else if(state == paramentitydecl_name_state2)
+ return "paramentitydecl_name_state2";
+ else if(state == paramentitydecl_content_state)
+ return "paramentitydecl_content_state";
+ else if(state == paramentitydecl_value_apos_state)
+ return "paramentitydecl_value_apos_state";
+ else if(state == paramentitydecl_value_quot_state)
+ return "paramentitydecl_value_quot_state";
+ else if(state == paramentitydecl_end_state)
+ return "paramentitydecl_end_state";
+
return "unknown";
}
#endif
Modified: trunk/faxpp/src/tokenizer_states.h
===================================================================
--- trunk/faxpp/src/tokenizer_states.h 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/tokenizer_states.h 2008-02-27 09:44:24 UTC (rev 38)
@@ -279,12 +279,12 @@
FAXPP_Error pubid_literal_apos_state(FAXPP_TokenizerEnv *env);
FAXPP_Error pubid_literal_quot_state(FAXPP_TokenizerEnv ...
[truncated message content] |
|
From: <jp...@us...> - 2008-03-03 14:32:33
|
Revision: 39
http://faxpp.svn.sourceforge.net/faxpp/?rev=39&view=rev
Author: jpcs
Date: 2008-03-03 06:32:32 -0800 (Mon, 03 Mar 2008)
Log Message:
-----------
Implemented entity replacement, using a stack of tokenizers to parse the
different entity values.
Implemented external entity resolution and parsing using API methods or
a callback function.
Fixed a number of bugs.
Modified Paths:
--------------
trunk/faxpp/Makefile.am
trunk/faxpp/Makefile.in
trunk/faxpp/configure
trunk/faxpp/configure.in
trunk/faxpp/docs/Doxyfile.api
trunk/faxpp/examples/parser_example.c
trunk/faxpp/examples/tokenizer_example.c
trunk/faxpp/include/faxpp/error.h
trunk/faxpp/include/faxpp/event.h
trunk/faxpp/include/faxpp/parser.h
trunk/faxpp/include/faxpp/tokenizer.h
trunk/faxpp/include/faxpp/transcode.h
trunk/faxpp/src/attr_states.h
trunk/faxpp/src/buffer.c
trunk/faxpp/src/buffer.h
trunk/faxpp/src/char_classes.c
trunk/faxpp/src/char_classes.h
trunk/faxpp/src/doctype.c
trunk/faxpp/src/element_states.h
trunk/faxpp/src/error.c
trunk/faxpp/src/event.c
trunk/faxpp/src/tokenizer_states.c
trunk/faxpp/src/tokenizer_states.h
trunk/faxpp/src/transcode.c
trunk/faxpp/src/xml_parser.c
trunk/faxpp/src/xml_parser.h
trunk/faxpp/src/xml_tokenizer.c
trunk/faxpp/src/xml_tokenizer.h
trunk/faxpp/src/xmldecl.c
trunk/faxpp/tests/xmlconf_runner.c
Modified: trunk/faxpp/Makefile.am
===================================================================
--- trunk/faxpp/Makefile.am 2008-02-27 09:44:24 UTC (rev 38)
+++ trunk/faxpp/Makefile.am 2008-03-03 14:32:32 UTC (rev 39)
@@ -7,7 +7,7 @@
lib_LTLIBRARIES = libfaxpp.la
# Library version, see http://sourceware.org/autobook/autobook/autobook_91.html
-libfaxpp_la_LDFLAGS = -version-info 1:0:0 -no-undefined
+libfaxpp_la_LDFLAGS = -version-info 2:0:0 -no-undefined
libfaxpp_la_SOURCES = \
src/attr_states.h \
Modified: trunk/faxpp/Makefile.in
===================================================================
--- trunk/faxpp/Makefile.in 2008-02-27 09:44:24 UTC (rev 38)
+++ trunk/faxpp/Makefile.in 2008-03-03 14:32:32 UTC (rev 39)
@@ -229,7 +229,7 @@
lib_LTLIBRARIES = libfaxpp.la
# Library version, see http://sourceware.org/autobook/autobook/autobook_91.html
-libfaxpp_la_LDFLAGS = -version-info 1:0:0 -no-undefined
+libfaxpp_la_LDFLAGS = -version-info 2:0:0 -no-undefined
libfaxpp_la_SOURCES = \
src/attr_states.h \
src/buffer.c \
Modified: trunk/faxpp/configure
===================================================================
--- trunk/faxpp/configure 2008-02-27 09:44:24 UTC (rev 38)
+++ trunk/faxpp/configure 2008-03-03 14:32:32 UTC (rev 39)
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.61 for faxpp 0.2.
+# Generated by GNU Autoconf 2.61 for faxpp 0.3.
#
# Report bugs to <jo...@sn...>.
#
@@ -728,8 +728,8 @@
# Identity of this package.
PACKAGE_NAME='faxpp'
PACKAGE_TARNAME='faxpp'
-PACKAGE_VERSION='0.2'
-PACKAGE_STRING='faxpp 0.2'
+PACKAGE_VERSION='0.3'
+PACKAGE_STRING='faxpp 0.3'
PACKAGE_BUGREPORT='jo...@sn...'
ac_unique_file="include/faxpp/parser.h"
@@ -1390,7 +1390,7 @@
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures faxpp 0.2 to adapt to many kinds of systems.
+\`configure' configures faxpp 0.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1460,7 +1460,7 @@
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of faxpp 0.2:";;
+ short | recursive ) echo "Configuration of faxpp 0.3:";;
esac
cat <<\_ACEOF
@@ -1564,7 +1564,7 @@
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-faxpp configure 0.2
+faxpp configure 0.3
generated by GNU Autoconf 2.61
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1578,7 +1578,7 @@
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by faxpp $as_me 0.2, which was
+It was created by faxpp $as_me 0.3, which was
generated by GNU Autoconf 2.61. Invocation command line was
$ $0 $@
@@ -2268,7 +2268,7 @@
# Define the identity of the package.
PACKAGE=faxpp
- VERSION=0.2
+ VERSION=0.3
# Some tools Automake needs.
@@ -21442,7 +21442,7 @@
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by faxpp $as_me 0.2, which was
+This file was extended by faxpp $as_me 0.3, which was
generated by GNU Autoconf 2.61. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -21495,7 +21495,7 @@
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-faxpp config.status 0.2
+faxpp config.status 0.3
configured by $0, generated by GNU Autoconf 2.61,
with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
Modified: trunk/faxpp/configure.in
===================================================================
--- trunk/faxpp/configure.in 2008-02-27 09:44:24 UTC (rev 38)
+++ trunk/faxpp/configure.in 2008-03-03 14:32:32 UTC (rev 39)
@@ -2,9 +2,9 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.59)
-AC_INIT(faxpp, 0.2, jo...@sn...)
+AC_INIT(faxpp, 0.3, jo...@sn...)
AC_CONFIG_AUX_DIR(autotools)
-AM_INIT_AUTOMAKE(faxpp, 0.2, jo...@sn...)
+AM_INIT_AUTOMAKE(faxpp, 0.3, jo...@sn...)
AC_CONFIG_SRCDIR([include/faxpp/parser.h])
AM_CONFIG_HEADER([src/config.h])
Modified: trunk/faxpp/docs/Doxyfile.api
===================================================================
--- trunk/faxpp/docs/Doxyfile.api 2008-02-27 09:44:24 UTC (rev 38)
+++ trunk/faxpp/docs/Doxyfile.api 2008-03-03 14:32:32 UTC (rev 39)
@@ -23,7 +23,7 @@
# This could be handy for archiving the generated documentation or
# if some version control system is used.
-PROJECT_NUMBER = 0.2
+PROJECT_NUMBER = 0.3
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.
Modified: trunk/faxpp/examples/parser_example.c
===================================================================
--- trunk/faxpp/examples/parser_example.c 2008-02-27 09:44:24 UTC (rev 38)
+++ trunk/faxpp/examples/parser_example.c 2008-03-03 14:32:32 UTC (rev 39)
@@ -44,7 +44,31 @@
}
}
+void
+output_escaped_attr_text(const FAXPP_Text *text, FILE *stream)
+{
+ char *buffer = (char*)text->ptr;
+ char *buffer_end = buffer + text->len;
+
+ while(buffer < buffer_end) {
+ if(*buffer == '&') {
+ fprintf(stream, "&");
+ }
+ else if(*buffer == '<') {
+ fprintf(stream, "<");
+ }
+ else if(*buffer == '"') {
+ fprintf(stream, """);
+ }
+ else {
+ putc(*buffer, stream);
+ }
+ ++buffer;
+ }
+}
+
#define SHOW_URIS 0
+#define SHOW_ENTITIES 0
void
output_event(const FAXPP_Event *event, FILE *stream)
@@ -70,6 +94,31 @@
break;
case END_DOCUMENT_EVENT:
break;
+ case DOCTYPE_EVENT:
+ fprintf(stream, "<!DOCTYPE ");
+
+ if(event->prefix.ptr != 0) {
+ output_text(&event->prefix, stream);
+ fprintf(stream, ":");
+ }
+ output_text(&event->name, stream);
+
+ if(event->system.ptr != 0) {
+ if(event->public.ptr != 0) {
+ fprintf(stream, " PUBLIC \"");
+ output_text(&event->public, stream);
+ fprintf(stream, "\" \"");
+ output_text(&event->system, stream);
+ fprintf(stream, "\"");
+ }
+ else {
+ fprintf(stream, " SYSTEM \"");
+ output_text(&event->system, stream);
+ fprintf(stream, "\"");
+ }
+ }
+ fprintf(stream, ">");
+ break;
case START_ELEMENT_EVENT:
case SELF_CLOSING_ELEMENT_EVENT:
fprintf(stream, "<");
@@ -106,7 +155,7 @@
while(atval) {
switch(atval->type) {
case CHARACTERS_EVENT:
- output_text(&atval->value, stream);
+ output_escaped_attr_text(&atval->value, stream);
break;
case ENTITY_REFERENCE_EVENT:
fprintf(stream, "&");
@@ -123,6 +172,18 @@
output_text(&atval->name, stream);
fprintf(stream, ";");
break;
+ case ENTITY_REFERENCE_START_EVENT:
+#if SHOW_ENTITIES
+ fprintf(stream, "&");
+ output_text(&atval->name, stream);
+ fprintf(stream, ";(");
+#endif
+ break;
+ case ENTITY_REFERENCE_END_EVENT:
+#if SHOW_ENTITIES
+ fprintf(stream, ")");
+#endif
+ break;
default:
break;
}
@@ -193,11 +254,73 @@
output_text(&event->name, stream);
fprintf(stream, ";");
break;
+ case ENTITY_REFERENCE_START_EVENT:
+#if SHOW_ENTITIES
+ fprintf(stream, "&");
+ output_text(&event->name, stream);
+ fprintf(stream, ";(");
+#endif
+ break;
+ case ENTITY_REFERENCE_END_EVENT:
+#if SHOW_ENTITIES
+ fprintf(stream, ")");
+#endif
+ break;
+ case START_EXTERNAL_ENTITY_EVENT:
+ case END_EXTERNAL_ENTITY_EVENT:
case NO_EVENT:
break;
}
}
+char *resolve_paths(const char *base, const char *path, unsigned int path_len)
+{
+ unsigned int base_len = strlen(base);
+
+ char *result = malloc(base_len + path_len + 1);
+ char *ptr = result;
+
+ strcpy(ptr, base);
+ ptr += base_len - 1;
+
+ while(ptr >= result && *ptr != '/') {
+ --ptr;
+ }
+ ++ptr;
+
+ strncpy(ptr, path, path_len);
+ ptr += path_len;
+ *ptr = 0;
+
+ return result;
+}
+
+static unsigned int file_read_callback(void *userData, void *buffer, unsigned int length)
+{
+ unsigned int result = fread(buffer, 1, length, (FILE*)userData);
+ if(result < length) {
+ fclose((FILE*)userData);
+ }
+ return result;
+}
+
+static FAXPP_Error entity_callback(void *userData, FAXPP_Parser *parser,
+ const FAXPP_Text *system, const FAXPP_Text *public)
+{
+ FILE *file;
+ char *path;
+
+ path = resolve_paths((char*)userData, (char*)system->ptr, system->len);
+ file = fopen(path, "r");
+ if(file == 0) {
+ printf("Open of '%s' failed: %s\n", path, strerror(errno));
+ return CANT_LOCATE_EXTERNAL_ENTITY;
+ }
+ free(path);
+
+ return FAXPP_parse_external_entity_callback(parser, file_read_callback, file);
+}
+
int
main(int argc, char **argv)
{
@@ -211,7 +334,7 @@
exit(-1);
}
- FAXPP_Parser *parser = FAXPP_create_parser(WELL_FORMED_PARSE_MODE, FAXPP_utf8_encode);
+ FAXPP_Parser *parser = FAXPP_create_parser(WELL_FORMED_PARSE_MODE, FAXPP_utf8_transcoder);
if(parser == 0) {
printf("ERROR: out of memory\n");
exit(1);
@@ -219,15 +342,17 @@
for(i = 1; i < argc; ++i) {
+ FAXPP_set_external_entity_callback(parser, entity_callback, argv[i]);
+
startTime = getTime();
file = fopen(argv[i], "r");
if(file == 0) {
- printf("Open failed: %s\n", strerror(errno));
+ printf("Open of '%s' failed: %s\n", argv[i], strerror(errno));
exit(1);
}
- err = FAXPP_init_parse_file(parser, file);
+ err = FAXPP_init_parse_callback(parser, file_read_callback, file);
if(err != NO_ERROR) {
printf("ERROR: %s\n", FAXPP_err_to_string(err));
exit(1);
@@ -245,8 +370,6 @@
FAXPP_get_error_column(parser), FAXPP_err_to_string(err));
}
- fclose(file);
-
printf("Time taken: %gms\n", ((double)(getTime() - startTime) / MSECS_IN_SECS * 1000));
}
Modified: trunk/faxpp/examples/tokenizer_example.c
===================================================================
--- trunk/faxpp/examples/tokenizer_example.c 2008-02-27 09:44:24 UTC (rev 38)
+++ trunk/faxpp/examples/tokenizer_example.c 2008-03-03 14:32:32 UTC (rev 39)
@@ -52,7 +52,7 @@
exit(-1);
}
- FAXPP_Tokenizer *tokenizer = FAXPP_create_tokenizer(FAXPP_utf8_encode);
+ FAXPP_Tokenizer *tokenizer = FAXPP_create_tokenizer(FAXPP_utf8_transcoder);
if(tokenizer == 0) {
printf("ERROR: out of memory\n");
exit(1);
@@ -94,7 +94,7 @@
}
else length = 0;
- length += fread(xml, 1, sizeof(xml) - length, file);
+ length += fread(xml + length, 1, sizeof(xml) - length, file);
err = FAXPP_continue_tokenize(tokenizer, xml, length, length != sizeof(xml));
if(err != NO_ERROR) {
Modified: trunk/faxpp/include/faxpp/error.h
===================================================================
--- trunk/faxpp/include/faxpp/error.h 2008-02-27 09:44:24 UTC (rev 38)
+++ trunk/faxpp/include/faxpp/error.h 2008-03-03 14:32:32 UTC (rev 39)
@@ -57,6 +57,14 @@
INVALID_CHAR_IN_NOTATIONDECL_NAME,
INVALID_CHAR_IN_ENTITYDECL_NAME,
INVALID_ENTITYDECL,
+ REFERENCE_TO_UNPARSED_ENTITY,
+ RECURSIVE_ENTITY,
+ INCOMPLETE_MARKUP_IN_ENTITY_VALUE,
+ REFERENCE_TO_EXTERNAL_ENTITY,
+ ILLEGAL_CHARACTER_REFERENCE,
+ ILLEGAL_PARAMETER_ENTITY,
+ CANT_LOCATE_EXTERNAL_ENTITY,
+ DONT_PARSE_EXTERNAL_ENTITY,
OUT_OF_MEMORY,
ELEMENT_NAME_MISMATCH,
Modified: trunk/faxpp/include/faxpp/event.h
===================================================================
--- trunk/faxpp/include/faxpp/event.h 2008-02-27 09:44:24 UTC (rev 38)
+++ trunk/faxpp/include/faxpp/event.h 2008-03-03 14:32:32 UTC (rev 39)
@@ -24,6 +24,7 @@
NO_EVENT,
START_DOCUMENT_EVENT,
END_DOCUMENT_EVENT,
+ DOCTYPE_EVENT,
START_ELEMENT_EVENT,
SELF_CLOSING_ELEMENT_EVENT,
END_ELEMENT_EVENT,
@@ -32,17 +33,26 @@
IGNORABLE_WHITESPACE_EVENT,
COMMENT_EVENT,
PI_EVENT,
- ENTITY_REFERENCE_EVENT,
+ ENTITY_REFERENCE_EVENT, ///< Reference to an external parsed entity that has not been parsed yet, or a built in entity
DEC_CHAR_REFERENCE_EVENT,
- HEX_CHAR_REFERENCE_EVENT
+ HEX_CHAR_REFERENCE_EVENT,
+ ENTITY_REFERENCE_START_EVENT, ///< Delimits the start of an expanded entity reference
+ ENTITY_REFERENCE_END_EVENT, ///< Delimits the end of an expanded entity reference
+ START_EXTERNAL_ENTITY_EVENT, ///< Occurs when an external entity parsed by the user starts
+ END_EXTERNAL_ENTITY_EVENT ///< Occurs when an external entity parsed by the user ends
} FAXPP_EventType;
/// An item in a linked list of a FAXPP_Attribute object's value
typedef struct FAXPP_AttrValue_s {
- FAXPP_EventType type; ///< The type of the attribute value. Can be CHARACTERS_EVENT, ENTITY_REFERENCE_EVENT, DEC_CHAR_REFERENCE_EVENT, and HEX_CHAR_REFERENCE_EVENT
- /// The name of the item in the attribute's value. \details Present for event types ENTITY_REFERENCE_EVENT, DEC_CHAR_REFERENCE_EVENT, and HEX_CHAR_REFERENCE_EVENT
+ /// The type of the attribute value. \details Can be CHARACTERS_EVENT, ENTITY_REFERENCE_EVENT, DEC_CHAR_REFERENCE_EVENT, HEX_CHAR_REFERENCE_EVENT,
+ /// ENTITY_REFERENCE_START_EVENT and ENTITY_REFERENCE_END_EVENT
+ FAXPP_EventType type;
+ /// The name of the item in the attribute's value. \details Present for event types ENTITY_REFERENCE_EVENT, DEC_CHAR_REFERENCE_EVENT,
+ /// HEX_CHAR_REFERENCE_EVENT and ENTITY_REFERENCE_START_EVENT
FAXPP_Text name;
- FAXPP_Text value; ///< The value of the item in the attribute's value. \details Present for all possible event types
+ /// The value of the item in the attribute's value. \details Present for all possible event types except ENTITY_REFERENCE_START_EVENT and
+ /// ENTITY_REFERENCE_END_EVENT
+ FAXPP_Text value;
unsigned int line; ///< The line the attribute value started on
unsigned int column; ///< The column the attribute value started on
@@ -72,10 +82,10 @@
typedef struct {
FAXPP_EventType type; ///< The type of the event
- FAXPP_Text prefix; ///< The prefix of the event. \details Present for event types START_ELEMENT_EVENT, SELF_CLOSING_ELEMENT_EVENT, and END_ELEMENT_EVENT
+ FAXPP_Text prefix; ///< The prefix of the event. \details Present for event types DOCTYPE_EVENT, START_ELEMENT_EVENT, SELF_CLOSING_ELEMENT_EVENT, and END_ELEMENT_EVENT
FAXPP_Text uri; ///< The URI of the event. \details Present for event types START_ELEMENT_EVENT, SELF_CLOSING_ELEMENT_EVENT, and END_ELEMENT_EVENT
- /// The name of the event. \details Present for event types START_ELEMENT_EVENT, SELF_CLOSING_ELEMENT_EVENT, END_ELEMENT_EVENT, PI_EVENT, ENTITY_REFERENCE_EVENT,
- /// DEC_CHAR_REFERENCE_EVENT, and HEX_CHAR_REFERENCE_EVENT
+ /// The name of the event. \details Present for event types DOCTYPE_EVENT, START_ELEMENT_EVENT, SELF_CLOSING_ELEMENT_EVENT, END_ELEMENT_EVENT, PI_EVENT,
+ /// ENTITY_REFERENCE_EVENT, DEC_CHAR_REFERENCE_EVENT, and HEX_CHAR_REFERENCE_EVENT
FAXPP_Text name;
/// The value of the event. \details Present for event types CHARACTERS_EVENT, CDATA_EVENT, IGNORABLE_WHITESPACE_EVENT, COMMENT_EVENT, PI_EVENT,
@@ -85,10 +95,13 @@
unsigned int attr_count; ///< The number of attributes in the event. \details Present for event types START_ELEMENT_EVENT, and SELF_CLOSING_ELEMENT_EVENT
FAXPP_Attribute *attrs; ///< Array of attributes. \details Present for event types START_ELEMENT_EVENT, and SELF_CLOSING_ELEMENT_EVENT
- FAXPP_Text version; ///< The version of the event. \details Present only for the START_DOCUMENT_EVENT event type
- FAXPP_Text encoding; ///< The version of the event. \details Present only for the START_DOCUMENT_EVENT event type
+ FAXPP_Text version; ///< The version of the event. \details Present for event types START_DOCUMENT_EVENT and START_EXTERNAL_ENTITY_EVENT
+ FAXPP_Text encoding; ///< The version of the event. \details Present for event types START_DOCUMENT_EVENT and START_EXTERNAL_ENTITY_EVENT
FAXPP_Text standalone; ///< The version of the event. \details Present only for the START_DOCUMENT_EVENT event type
+ FAXPP_Text system; ///< The system literal of the event. \details Present for event types DOCTYPE_EVENT and ENTITY_REFERENCE_EVENT
+ FAXPP_Text public; ///< The public ID literal of the event. \details Present for event types DOCTYPE_EVENT and ENTITY_REFERENCE_EVENT
+
unsigned int line; ///< The line number of the start of the event
unsigned int column; ///< The column number of the start of the event
} FAXPP_Event;
Modified: trunk/faxpp/include/faxpp/parser.h
===================================================================
--- trunk/faxpp/include/faxpp/parser.h 2008-02-27 09:44:24 UTC (rev 38)
+++ trunk/faxpp/include/faxpp/parser.h 2008-03-03 14:32:32 UTC (rev 39)
@@ -26,7 +26,7 @@
/**
* \mainpage
*
- * Faxpp is a small, fast XML pull parser written in C with an API that can return UTF-8 or UTF-16 strings. It currently has no DTD support, although it is planned.
+ * Faxpp is a small, fast and conformant XML pull parser written in C with an API that can return UTF-8 or UTF-16 strings.
*
* Faxpp is written by John Snelson, and is released under the terms of the Apache Licence v2.
*
@@ -46,13 +46,14 @@
* \li The output string encoding is the same as the XML document's encoding.
* \li The event / token does not cross a buffer boundary when streaming input.
* \li The parser is not set to null terminate it's strings.
+ * \li Attribute values do not need to be normalized.
*
* Therefore, to maximize the performance from faxpp the following steps can be taken:
*
* \li Choose not to null terminate output strings.
* \li Choose an output string encoding that is the same as most of the input XML documents that will be parsed.
- * \li Stream XML documents using a large a buffer as possible.
- * \li Choose to always recieve the output strings in the same encoding as the document, by setting FAXPP_set_encode() to 0.
+ * \li Stream XML documents using as large a buffer as possible.
+ * \li Turn off attribute value normalization by setting FAXPP_set_normalize_attrs() to 0 (this makes the parser non-conformant).
*
* \section Downloads
*
@@ -103,16 +104,33 @@
typedef unsigned int (*FAXPP_ReadCallback)(void *userData, void *buffer, unsigned int length);
/**
+ * The function called when faxpp finds a reference to an external parsed entity. The function should
+ * lcoate the entity using it's system and public indentifiers and call FAXPP_parse_external_entity(),
+ * FAXPP_parse_external_entity_callback() or FAXPP_parse_external_entity_file() to parse the external
+ * entity.
+ *
+ * \param userData The user data supplied to the FAXPP_set_external_entity_callback() method
+ * \param parser A pointer to the parser
+ * \param system The entity's system identifier
+ * \param public The entity's public identifier
+ *
+ * \return NO_ERROR on success, DONT_PARSE_EXTERNAL_ENTITY to return an unexpanded ENTITY_REFERENCE_EVENT
+ * event, otherwise another error code to halt parsing (most probably CANT_LOCATE_EXTERNAL_ENTITY).
+ */
+typedef FAXPP_Error (*FAXPP_ExternalEntityCallback)(void *userData, FAXPP_Parser *parser,
+ const FAXPP_Text *system, const FAXPP_Text *public);
+
+/**
* Creates a parser object
*
* \param mode The type of checks the parser should perform
- * \param encode The encoding function to use when encoding event values
+ * \param encode The transcoder to use when encoding event values
*
* \return A pointer to the parser object, or 0 if out of memory.
*
* \relatesalso FAXPP_Parser
*/
-FAXPP_Parser *FAXPP_create_parser(FAXPP_ParseMode mode, FAXPP_EncodeFunction encode);
+FAXPP_Parser *FAXPP_create_parser(FAXPP_ParseMode mode, FAXPP_Transcoder encode);
/**
* Frees a parser object
@@ -164,16 +182,16 @@
void FAXPP_set_normalize_attrs(FAXPP_Parser *parser, unsigned int boolean);
/**
- * Sets the encoding that the parser will use when encoding event values.
+ * Sets the transcoder that the parser will use when encoding event values.
*
* Setting this parameter whilst a parse is in progress has undefined results.
*
* \param parser
- * \param encode The encoding function to use when encoding event values
+ * \param encode The transcoder to use when encoding event values
*
* \relatesalso FAXPP_Parser
*/
-void FAXPP_set_encode(FAXPP_Parser *parser, FAXPP_EncodeFunction encode);
+void FAXPP_set_encode(FAXPP_Parser *parser, FAXPP_Transcoder encode);
/**
* Returns the current FAXPP_DecodeFunction that the parser is using.
@@ -203,6 +221,18 @@
void FAXPP_set_decode(FAXPP_Parser *parser, FAXPP_DecodeFunction decode);
/**
+ * Sets the FAXPP_ExternalEntityCallback that the parser will call when it
+ * encounters a reference to an external parsed entity.
+ *
+ * \param parser
+ * \param callback The callback function
+ * \param userData The usuer data passed when the function is called
+ *
+ * \relatesalso FAXPP_Parser
+ */
+void FAXPP_set_external_entity_callback(FAXPP_Parser *parser, FAXPP_ExternalEntityCallback callback, void *userData);
+
+/**
* Initialize the parser to parse the given buffer. This will halt any
* parse that was already in progress.
*
@@ -229,7 +259,7 @@
* parse that was already in progress.
*
* The file provided must remain valid during the time that the parser is using it.
- * The user remains responsible for closing the file after parsing has ended..
+ * The user remains responsible for closing the file after parsing has ended.
*
* \param parser The parser to initialize
* \param file The file descriptor of the file to parse
@@ -261,6 +291,73 @@
FAXPP_Error FAXPP_init_parse_callback(FAXPP_Parser *parser, FAXPP_ReadCallback callback, void *userData);
/**
+ * Interrupts parsing to parse the external entity in the given buffer. Any parsing
+ * that was previously underway will continue when the external entity has been parsed.
+ * This method is usually called when an ENTITY_REFERENCE_EVENT is encountered with a
+ * non-null public or system identifier, in order to parse the external entity it points
+ * to.
+ *
+ * The buffer provided must remain valid and unchanged during the time that
+ * the parser is using it, since a copy of it is \e not made. The user remains
+ * responsible for deleting the buffer.
+ *
+ * \param parser The parser to use
+ * \param buffer A pointer to the start of the buffer to parse
+ * \param length The length of the given buffer
+ * \param done Set to non-zero if this is the last buffer from the external entity
+ *
+ * \retval UNSUPPORTED_ENCODING If the encoding sniffing algorithm cannot recognize
+ * the encoding of the buffer
+ * \retval OUT_OF_MEMORY
+ * \retval NO_ERROR
+ *
+ * \relatesalso FAXPP_Parser
+ */
+FAXPP_Error FAXPP_parse_external_entity(FAXPP_Parser *parser, void *buffer, unsigned int length, unsigned int done);
+
+/**
+ * Interrupts parsing to parse the external entity from the given file. Any parsing
+ * that was previously underway will continue when the external entity has been parsed.
+ * This method is usually called when an ENTITY_REFERENCE_EVENT is encountered with a
+ * non-null public or system identifier, in order to parse the external entity it points
+ * to.
+ *
+ * The file provided must remain valid during the time that the parser is using it.
+ * The user remains responsible for closing the file after parsing has ended.
+ *
+ * \param parser The parser to initialize
+ * \param file The file descriptor of the file to parse
+ *
+ * \retval UNSUPPORTED_ENCODING If the encoding sniffing algorithm cannot recognize
+ * the encoding of the buffer
+ * \retval OUT_OF_MEMORY
+ * \retval NO_ERROR
+ *
+ * \relatesalso FAXPP_Parser
+ */
+FAXPP_Error FAXPP_parse_external_entity_file(FAXPP_Parser *parser, FILE *file);
+
+/**
+ * Interrupts parsing to parse the external entity using the given read callback. Any parsing
+ * that was previously underway will continue when the external entity has been parsed.
+ * This method is usually called when an ENTITY_REFERENCE_EVENT is encountered with a
+ * non-null public or system identifier, in order to parse the external entity it points
+ * to.
+ *
+ * \param parser The parser to initialize
+ * \param callback The read callback function to use to retrieve the parse input
+ * \param userData The user data to be passed to the callback function when it is called
+ *
+ * \retval UNSUPPORTED_ENCODING If the encoding sniffing algorithm cannot recognize
+ * the encoding of the buffer
+ * \retval OUT_OF_MEMORY
+ * \retval NO_ERROR
+ *
+ * \relatesalso FAXPP_Parser
+ */
+FAXPP_Error FAXPP_parse_external_entity_callback(FAXPP_Parser *parser, FAXPP_ReadCallback callback, void *userData);
+
+/**
* Instructs the parser to release any dependencies it has on it's current buffer.
*
* This is typically called on recieving a PREMATURE_END_OF_BUFFER error, before
Modified: trunk/faxpp/include/faxpp/tokenizer.h
===================================================================
--- trunk/faxpp/include/faxpp/tokenizer.h 2008-02-27 09:44:24 UTC (rev 38)
+++ trunk/faxpp/include/faxpp/tokenizer.h 2008-03-03 14:32:32 UTC (rev 39)
@@ -34,12 +34,12 @@
/**
* Creates a tokenizer object
*
- * \param encode The encoding function to use when encoding token values
+ * \param encode The transcoder to use when encoding token values
* \return A pointer to the tokenizer object, or 0 if out of memory.
*
* \relatesalso FAXPP_Tokenizer
*/
-FAXPP_Tokenizer *FAXPP_create_tokenizer(FAXPP_EncodeFunction encode);
+FAXPP_Tokenizer *FAXPP_create_tokenizer(FAXPP_Transcoder encode);
/**
* Frees a tokenizer object
Modified: trunk/faxpp/include/faxpp/transcode.h
===================================================================
--- trunk/faxpp/include/faxpp/transcode.h 2008-02-27 09:44:24 UTC (rev 38)
+++ trunk/faxpp/include/faxpp/transcode.h 2008-03-03 14:32:32 UTC (rev 39)
@@ -65,6 +65,30 @@
(void *buffer, void *buffer_end, Char32 ch);
/**
+ * Encapsulates the information needed to both decode and encode
+ * an encoding.
+ *
+ * \see FAXPP_utf8_transcoder, FAXPP_utf16_native_transcoder
+ */
+typedef struct {
+ FAXPP_DecodeFunction decode; ///< The decode function
+ FAXPP_EncodeFunction encode; ///< The encode function
+} FAXPP_Transcoder;
+
+/**
+ * Transcoder to and from UTF-8
+ *
+ * \see FAXPP_utf8_decode, FAXPP_utf8_encode
+ */
+const FAXPP_Transcoder FAXPP_utf8_transcoder;
+/**
+ * Transcoder to and from native endian UTF-16
+ *
+ * \see FAXPP_utf16_native_decode, FAXPP_utf16_native_encode
+ */
+const FAXPP_Transcoder FAXPP_utf16_native_transcoder;
+
+/**
* Returns a string describing the given (built-in) decode function
* \param t
* \return a string
Modified: trunk/faxpp/src/attr_states.h
===================================================================
--- trunk/faxpp/src/attr_states.h 2008-02-27 09:44:24 UTC (rev 38)
+++ trunk/faxpp/src/attr_states.h 2008-03-03 14:32:32 UTC (rev 39)
@@ -367,3 +367,61 @@
return NO_ERROR;
}
+// Special state for parsing the content of an entity reference
+// inside an attribute value
+FAXPP_Error
+PREFIX(attr_value_state_en)(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ if(env->position >= env->buffer_end) {
+ if(env->token.value.ptr) {
+ token_end_position(env);
+ if(env->token.value.len != 0) {
+ report_token(ATTRIBUTE_VALUE_TOKEN, env);
+ return NO_ERROR;
+ }
+ }
+ token_start_position(env);
+ return PREMATURE_END_OF_BUFFER;
+ }
+
+ READ_CHAR;
+
+ switch(env->current_char) {
+ case '&':
+ store_state(env);
+ env->state = reference_state;
+ token_end_position(env);
+ report_token(ATTRIBUTE_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '<':
+ next_char(env);
+ return INVALID_CHAR_IN_ATTRIBUTE;
+ LINE_ENDINGS
+ case '\t': {
+ if(env->normalize_attrs) {
+ // Move the token to the buffer, to normalize it
+ FAXPP_Error err = FAXPP_tokenizer_release_buffer(env, 0);
+ if(err != NO_ERROR) return err;
+ env->current_char = ' ';
+ }
+ break;
+ }
+ default:
+ DEFAULT_CASE;
+
+ if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+ next_char(env);
+ return RESTRICTED_CHAR;
+ }
+ break;
+ }
+ next_char(env);
+ ...
[truncated message content] |
|
From: <jp...@us...> - 2008-03-06 02:04:02
|
Revision: 40
http://faxpp.svn.sourceforge.net/faxpp/?rev=40&view=rev
Author: jpcs
Date: 2008-03-05 18:04:04 -0800 (Wed, 05 Mar 2008)
Log Message:
-----------
Implemented a user specified base URI for the files being parsed, so
that the entity callback can keep track of the origin of the entity
declaration.
Implemented the parsing of external subsets (DTDs).
"public" is a keyword in C++ - renamed "public" to "public_id" and
"system" to "system_id".
Modified Paths:
--------------
trunk/faxpp/Makefile.am
trunk/faxpp/Makefile.in
trunk/faxpp/examples/parser_example.c
trunk/faxpp/include/faxpp/event.h
trunk/faxpp/include/faxpp/parser.h
trunk/faxpp/include/faxpp/transcode.h
trunk/faxpp/src/char_classes.h
trunk/faxpp/src/doctype.c
trunk/faxpp/src/tokenizer_states.c
trunk/faxpp/src/tokenizer_states.h
trunk/faxpp/src/xml_parser.c
trunk/faxpp/src/xml_parser.h
trunk/faxpp/src/xml_tokenizer.c
trunk/faxpp/src/xml_tokenizer.h
trunk/faxpp/src/xmldecl.c
trunk/faxpp/tests/xmlconf_runner.c
Added Paths:
-----------
trunk/faxpp/examples/entity_resolver.c
trunk/faxpp/examples/entity_resolver.h
trunk/faxpp/examples/output_event.c
trunk/faxpp/examples/output_event.h
Modified: trunk/faxpp/Makefile.am
===================================================================
--- trunk/faxpp/Makefile.am 2008-03-03 14:32:32 UTC (rev 39)
+++ trunk/faxpp/Makefile.am 2008-03-06 02:04:04 UTC (rev 40)
@@ -38,14 +38,20 @@
src/notationdecl.c \
src/entitydecl.c
+tokenizer_example_LDADD = libfaxpp.la
tokenizer_example_SOURCES = examples/tokenizer_example.c
-tokenizer_example_LDADD = libfaxpp.la
-parser_example_SOURCES = examples/parser_example.c
parser_example_LDADD = libfaxpp.la
+parser_example_SOURCES = \
+examples/parser_example.c \
+examples/entity_resolver.c \
+examples/output_event.c
-xmlconf_runner_SOURCES = tests/xmlconf_runner.c
xmlconf_runner_LDADD = libfaxpp.la
+xmlconf_runner_SOURCES = \
+tests/xmlconf_runner.c \
+examples/entity_resolver.c \
+examples/output_event.c
EXTRA_DIST = \
docs/Doxyfile.api \
Modified: trunk/faxpp/Makefile.in
===================================================================
--- trunk/faxpp/Makefile.in 2008-03-03 14:32:32 UTC (rev 39)
+++ trunk/faxpp/Makefile.in 2008-03-06 02:04:04 UTC (rev 40)
@@ -70,13 +70,15 @@
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(libfaxpp_la_LDFLAGS) $(LDFLAGS) -o $@
PROGRAMS = $(noinst_PROGRAMS)
-am_parser_example_OBJECTS = parser_example.$(OBJEXT)
+am_parser_example_OBJECTS = parser_example.$(OBJEXT) \
+ entity_resolver.$(OBJEXT) output_event.$(OBJEXT)
parser_example_OBJECTS = $(am_parser_example_OBJECTS)
parser_example_DEPENDENCIES = libfaxpp.la
am_tokenizer_example_OBJECTS = tokenizer_example.$(OBJEXT)
tokenizer_example_OBJECTS = $(am_tokenizer_example_OBJECTS)
tokenizer_example_DEPENDENCIES = libfaxpp.la
-am_xmlconf_runner_OBJECTS = xmlconf_runner.$(OBJEXT)
+am_xmlconf_runner_OBJECTS = xmlconf_runner.$(OBJEXT) \
+ entity_resolver.$(OBJEXT) output_event.$(OBJEXT)
xmlconf_runner_OBJECTS = $(am_xmlconf_runner_OBJECTS)
xmlconf_runner_DEPENDENCIES = libfaxpp.la
DEFAULT_INCLUDES = -I. -I$(top_builddir)/src@am__isrc@
@@ -259,12 +261,20 @@
src/notationdecl.c \
src/entitydecl.c
+tokenizer_example_LDADD = libfaxpp.la
tokenizer_example_SOURCES = examples/tokenizer_example.c
-tokenizer_example_LDADD = libfaxpp.la
-parser_example_SOURCES = examples/parser_example.c
parser_example_LDADD = libfaxpp.la
-xmlconf_runner_SOURCES = tests/xmlconf_runner.c
+parser_example_SOURCES = \
+examples/parser_example.c \
+examples/entity_resolver.c \
+examples/output_event.c
+
xmlconf_runner_LDADD = libfaxpp.la
+xmlconf_runner_SOURCES = \
+tests/xmlconf_runner.c \
+examples/entity_resolver.c \
+examples/output_event.c
+
EXTRA_DIST = \
docs/Doxyfile.api \
docs/header.html \
@@ -387,10 +397,12 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doctype.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/element.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elementdecl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/entity_resolver.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/entitydecl.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/event.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/notationdecl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/output_event.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parser_example.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pi.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reference.Plo@am__quote@
@@ -578,6 +590,34 @@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o parser_example.obj `if test -f 'examples/parser_example.c'; then $(CYGPATH_W) 'examples/parser_example.c'; else $(CYGPATH_W) '$(srcdir)/examples/parser_example.c'; fi`
+entity_resolver.o: examples/entity_resolver.c
+@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT entity_resolver.o -MD -MP -MF $(DEPDIR)/entity_resolver.Tpo -c -o entity_resolver.o `test -f 'examples/entity_resolver.c' || echo '$(srcdir)/'`examples/entity_resolver.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/entity_resolver.Tpo $(DEPDIR)/entity_resolver.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='examples/entity_resolver.c' object='entity_resolver.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o entity_resolver.o `test -f 'examples/entity_resolver.c' || echo '$(srcdir)/'`examples/entity_resolver.c
+
+entity_resolver.obj: examples/entity_resolver.c
+@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT entity_resolver.obj -MD -MP -MF $(DEPDIR)/entity_resolver.Tpo -c -o entity_resolver.obj `if test -f 'examples/entity_resolver.c'; then $(CYGPATH_W) 'examples/entity_resolver.c'; else $(CYGPATH_W) '$(srcdir)/examples/entity_resolver.c'; fi`
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/entity_resolver.Tpo $(DEPDIR)/entity_resolver.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='examples/entity_resolver.c' object='entity_resolver.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o entity_resolver.obj `if test -f 'examples/entity_resolver.c'; then $(CYGPATH_W) 'examples/entity_resolver.c'; else $(CYGPATH_W) '$(srcdir)/examples/entity_resolver.c'; fi`
+
+output_event.o: examples/output_event.c
+@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT output_event.o -MD -MP -MF $(DEPDIR)/output_event.Tpo -c -o output_event.o `test -f 'examples/output_event.c' || echo '$(srcdir)/'`examples/output_event.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/output_event.Tpo $(DEPDIR)/output_event.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='examples/output_event.c' object='output_event.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o output_event.o `test -f 'examples/output_event.c' || echo '$(srcdir)/'`examples/output_event.c
+
+output_event.obj: examples/output_event.c
+@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT output_event.obj -MD -MP -MF $(DEPDIR)/output_event.Tpo -c -o output_event.obj `if test -f 'examples/output_event.c'; then $(CYGPATH_W) 'examples/output_event.c'; else $(CYGPATH_W) '$(srcdir)/examples/output_event.c'; fi`
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/output_event.Tpo $(DEPDIR)/output_event.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='examples/output_event.c' object='output_event.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o output_event.obj `if test -f 'examples/output_event.c'; then $(CYGPATH_W) 'examples/output_event.c'; else $(CYGPATH_W) '$(srcdir)/examples/output_event.c'; fi`
+
tokenizer_example.o: examples/tokenizer_example.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tokenizer_example.o -MD -MP -MF $(DEPDIR)/tokenizer_example.Tpo -c -o tokenizer_example.o `test -f 'examples/tokenizer_example.c' || echo '$(srcdir)/'`examples/tokenizer_example.c
@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/tokenizer_example.Tpo $(DEPDIR)/tokenizer_example.Po
Added: trunk/faxpp/examples/entity_resolver.c
===================================================================
--- trunk/faxpp/examples/entity_resolver.c (rev 0)
+++ trunk/faxpp/examples/entity_resolver.c 2008-03-06 02:04:04 UTC (rev 40)
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2007 Doxological Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "entity_resolver.h"
+
+char *resolve_paths(const char *base, unsigned int base_len, const char *path, unsigned int path_len)
+{
+ char *result = malloc(base_len + path_len + 1);
+ char *ptr = result;
+
+ strncpy(ptr, base, base_len);
+ ptr += base_len - 1;
+
+ while(ptr >= result && *ptr != '/') {
+ --ptr;
+ }
+ ++ptr;
+
+ strncpy(ptr, path, path_len);
+ ptr += path_len;
+ *ptr = 0;
+
+ return result;
+}
+
+unsigned int file_read_callback(void *userData, void *buffer, unsigned int length)
+{
+ unsigned int result = fread(buffer, 1, length, (FILE*)userData);
+ if(result < length) {
+ fclose((FILE*)userData);
+ }
+ return result;
+}
+
+FAXPP_Error entity_callback(void *userData, FAXPP_Parser *parser, FAXPP_EntityType type,
+ const FAXPP_Text *base_uri, const FAXPP_Text *system, const FAXPP_Text *public)
+{
+ FAXPP_Error err;
+ FILE *file;
+ char *path;
+
+ path = resolve_paths((char*)base_uri->ptr, base_uri->len, (char*)system->ptr, system->len);
+
+ file = fopen(path, "r");
+ if(file == 0) {
+ printf("Open of '%s' failed: %s\n", path, strerror(errno));
+ return CANT_LOCATE_EXTERNAL_ENTITY;
+ }
+
+ err = FAXPP_parse_external_entity_callback(parser, type, file_read_callback, file);
+ if(err == NO_ERROR)
+ err = FAXPP_set_base_uri_str(parser, path);
+
+ free(path);
+ return err;
+}
Added: trunk/faxpp/examples/entity_resolver.h
===================================================================
--- trunk/faxpp/examples/entity_resolver.h (rev 0)
+++ trunk/faxpp/examples/entity_resolver.h 2008-03-06 02:04:04 UTC (rev 40)
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2007 Doxological Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ENTITY_RESOLVER_H
+#define ENTITY_RESOLVER_H
+
+#include <faxpp/parser.h>
+
+char *resolve_paths(const char *base, unsigned int base_len, const char *path, unsigned int path_len);
+unsigned int file_read_callback(void *userData, void *buffer, unsigned int length);
+FAXPP_Error entity_callback(void *userData, FAXPP_Parser *parser, FAXPP_EntityType type,
+ const FAXPP_Text *base_uri, const FAXPP_Text *system, const FAXPP_Text *public);
+
+#endif
Added: trunk/faxpp/examples/output_event.c
===================================================================
--- trunk/faxpp/examples/output_event.c (rev 0)
+++ trunk/faxpp/examples/output_event.c 2008-03-06 02:04:04 UTC (rev 40)
@@ -0,0 +1,264 @@
+/*
+ * Copyright 2007 Doxological Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* #include <stdio.h> */
+/* #include <stdlib.h> */
+/* #include <errno.h> */
+/* #include <string.h> */
+
+#include "output_event.h"
+
+#define SHOW_URIS 0
+#define SHOW_ENTITIES 0
+
+void
+output_text(const FAXPP_Text *text, FILE *stream)
+{
+ char *buffer = (char*)text->ptr;
+ char *buffer_end = buffer + text->len;
+
+ while(buffer < buffer_end) {
+ putc(*buffer++, stream);
+ }
+}
+
+void
+output_escaped_attr_text(const FAXPP_Text *text, FILE *stream)
+{
+ char *buffer = (char*)text->ptr;
+ char *buffer_end = buffer + text->len;
+
+ while(buffer < buffer_end) {
+ if(*buffer == '&') {
+ fprintf(stream, "&");
+ }
+ else if(*buffer == '<') {
+ fprintf(stream, "<");
+ }
+ else if(*buffer == '"') {
+ fprintf(stream, """);
+ }
+ else {
+ putc(*buffer, stream);
+ }
+ ++buffer;
+ }
+}
+
+void output_attr_value(const FAXPP_AttrValue *atval, FILE *stream)
+{
+ while(atval) {
+ switch(atval->type) {
+ case CHARACTERS_EVENT:
+ output_escaped_attr_text(&atval->value, stream);
+ break;
+ case ENTITY_REFERENCE_EVENT:
+ fprintf(stream, "&");
+ output_text(&atval->name, stream);
+ fprintf(stream, ";");
+ break;
+ case DEC_CHAR_REFERENCE_EVENT:
+ fprintf(stream, "&#");
+ output_text(&atval->name, stream);
+ fprintf(stream, ";");
+ break;
+ case HEX_CHAR_REFERENCE_EVENT:
+ fprintf(stream, "&#x");
+ output_text(&atval->name, stream);
+ fprintf(stream, ";");
+ break;
+ case ENTITY_REFERENCE_START_EVENT:
+#if SHOW_ENTITIES
+ fprintf(stream, "&");
+ output_text(&atval->name, stream);
+ fprintf(stream, ";(");
+#endif
+ break;
+ case ENTITY_REFERENCE_END_EVENT:
+#if SHOW_ENTITIES
+ fprintf(stream, ")");
+#endif
+ break;
+ default:
+ break;
+ }
+ atval = atval->next;
+ }
+}
+
+void
+output_event(const FAXPP_Event *event, FILE *stream)
+{
+ int i;
+
+ switch(event->type) {
+ case START_DOCUMENT_EVENT:
+ if(event->version.ptr != 0) {
+ fprintf(stream, "<?xml version=\"");
+ output_text(&event->version, stream);
+ if(event->encoding.ptr != 0) {
+ fprintf(stream, "\" encoding=\"");
+ output_text(&event->encoding, stream);
+ }
+ if(event->standalone.ptr != 0) {
+ fprintf(stream, "\" standalone=\"");
+ output_text(&event->standalone, stream);
+ }
+ fprintf(stream, "\"?>");
+ }
+ break;
+ case END_DOCUMENT_EVENT:
+ break;
+ case DOCTYPE_EVENT:
+ fprintf(stream, "<!DOCTYPE ");
+
+ if(event->prefix.ptr != 0) {
+ output_text(&event->prefix, stream);
+ fprintf(stream, ":");
+ }
+ output_text(&event->name, stream);
+
+ if(event->system_id.ptr != 0) {
+ if(event->public_id.ptr != 0) {
+ fprintf(stream, " PUBLIC \"");
+ output_text(&event->public_id, stream);
+ fprintf(stream, "\" \"");
+ output_text(&event->system_id, stream);
+ fprintf(stream, "\"");
+ }
+ else {
+ fprintf(stream, " SYSTEM \"");
+ output_text(&event->system_id, stream);
+ fprintf(stream, "\"");
+ }
+ }
+ fprintf(stream, ">");
+ break;
+ case START_ELEMENT_EVENT:
+ case SELF_CLOSING_ELEMENT_EVENT:
+ fprintf(stream, "<");
+#if SHOW_URIS
+ if(event->uri.ptr != 0) {
+ fprintf(stream, "{");
+ output_text(&event->uri, stream);
+ fprintf(stream, "}");
+ } else
+#endif
+ if(event->prefix.ptr != 0) {
+ output_text(&event->prefix, stream);
+ fprintf(stream, ":");
+ }
+ output_text(&event->name, stream);
+
+ for(i = 0; i < event->attr_count; ++i) {
+ fprintf(stream, " ");
+#if SHOW_URIS
+ if(event->attrs[i].uri.ptr != 0) {
+ fprintf(stream, "{");
+ output_text(&event->attrs[i].uri, stream);
+ fprintf(stream, "}");
+ } else
+#endif
+ if(event->attrs[i].prefix.ptr != 0) {
+ output_text(&event->attrs[i].prefix, stream);
+ fprintf(stream, ":");
+ }
+ output_text(&event->attrs[i].name, stream);
+ fprintf(stream, "=\"");
+ output_attr_value(&event->attrs[i].value, stream);
+ fprintf(stream, "\"");
+ }
+
+ if(event->type == SELF_CLOSING_ELEMENT_EVENT)
+ fprintf(stream, "/>");
+ else
+ fprintf(stream, ">");
+ break;
+ case END_ELEMENT_EVENT:
+ fprintf(stream, "</");
+#if SHOW_URIS
+ if(event->uri.ptr != 0) {
+ fprintf(stream, "{");
+ output_text(&event->uri, stream);
+ fprintf(stream, "}");
+ } else
+#endif
+ if(event->prefix.ptr != 0) {
+ output_text(&event->prefix, stream);
+ fprintf(stream, ":");
+ }
+ output_text(&event->name, stream);
+ fprintf(stream, ">");
+ break;
+ case CHARACTERS_EVENT:
+ output_text(&event->value, stream);
+ break;
+ case CDATA_EVENT:
+ fprintf(stream, "<![CDATA[");
+ output_text(&event->value, stream);
+ fprintf(stream, "]]>");
+ break;
+ case IGNORABLE_WHITESPACE_EVENT:
+ output_text(&event->value, stream);
+ break;
+ case COMMENT_EVENT:
+ fprintf(stream, "<!--");
+ output_text(&event->value, stream);
+ fprintf(stream, "-->");
+ break;
+ case PI_EVENT:
+ fprintf(stream, "<?");
+ output_text(&event->name, stream);
+ if(event->value.ptr != 0) {
+ fprintf(stream, " ");
+ output_text(&event->value, stream);
+ }
+ fprintf(stream, "?>");
+ break;
+ case ENTITY_REFERENCE_EVENT:
+ fprintf(stream, "&");
+ output_text(&event->name, stream);
+ fprintf(stream, ";");
+ break;
+ case DEC_CHAR_REFERENCE_EVENT:
+ fprintf(stream, "&#");
+ output_text(&event->name, stream);
+ fprintf(stream, ";");
+ break;
+ case HEX_CHAR_REFERENCE_EVENT:
+ fprintf(stream, "&#x");
+ output_text(&event->name, stream);
+ fprintf(stream, ";");
+ break;
+ case ENTITY_REFERENCE_START_EVENT:
+#if SHOW_ENTITIES
+ fprintf(stream, "&");
+ output_text(&event->name, stream);
+ fprintf(stream, ";(");
+#endif
+ break;
+ case ENTITY_REFERENCE_END_EVENT:
+#if SHOW_ENTITIES
+ fprintf(stream, ")");
+#endif
+ break;
+ case START_EXTERNAL_ENTITY_EVENT:
+ case END_EXTERNAL_ENTITY_EVENT:
+ case NO_EVENT:
+ break;
+ }
+}
+
Added: trunk/faxpp/examples/output_event.h
===================================================================
--- trunk/faxpp/examples/output_event.h (rev 0)
+++ trunk/faxpp/examples/output_event.h 2008-03-06 02:04:04 UTC (rev 40)
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2007 Doxological Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OUTPUT_EVENT_H
+#define OUTPUT_EVENT_H
+
+#include <stdio.h>
+
+#include <faxpp/parser.h>
+
+void output_event(const FAXPP_Event *event, FILE *stream);
+void output_attr_value(const FAXPP_AttrValue *atval, FILE *stream);
+
+#endif
Modified: trunk/faxpp/examples/parser_example.c
===================================================================
--- trunk/faxpp/examples/parser_example.c 2008-03-03 14:32:32 UTC (rev 39)
+++ trunk/faxpp/examples/parser_example.c 2008-03-06 02:04:04 UTC (rev 40)
@@ -21,6 +21,8 @@
#include <sys/time.h>
#include <faxpp/parser.h>
+#include "entity_resolver.h"
+#include "output_event.h"
#define BUFFER_SIZE 10
#define MSECS_IN_SECS 1000000
@@ -33,294 +35,6 @@
return (timev.tv_sec * MSECS_IN_SECS) + timev.tv_usec;
}
-void
-output_text(const FAXPP_Text *text, FILE *stream)
-{
- char *buffer = (char*)text->ptr;
- char *buffer_end = buffer + text->len;
-
- while(buffer < buffer_end) {
- putc(*buffer++, stream);
- }
-}
-
-void
-output_escaped_attr_text(const FAXPP_Text *text, FILE *stream)
-{
- char *buffer = (char*)text->ptr;
- char *buffer_end = buffer + text->len;
-
- while(buffer < buffer_end) {
- if(*buffer == '&') {
- fprintf(stream, "&");
- }
- else if(*buffer == '<') {
- fprintf(stream, "<");
- }
- else if(*buffer == '"') {
- fprintf(stream, """);
- }
- else {
- putc(*buffer, stream);
- }
- ++buffer;
- }
-}
-
-#define SHOW_URIS 0
-#define SHOW_ENTITIES 0
-
-void
-output_event(const FAXPP_Event *event, FILE *stream)
-{
- int i;
- FAXPP_AttrValue *atval;
-
- switch(event->type) {
- case START_DOCUMENT_EVENT:
- if(event->version.ptr != 0) {
- fprintf(stream, "<?xml version=\"");
- output_text(&event->version, stream);
- if(event->encoding.ptr != 0) {
- fprintf(stream, "\" encoding=\"");
- output_text(&event->encoding, stream);
- }
- if(event->standalone.ptr != 0) {
- fprintf(stream, "\" standalone=\"");
- output_text(&event->standalone, stream);
- }
- fprintf(stream, "\"?>");
- }
- break;
- case END_DOCUMENT_EVENT:
- break;
- case DOCTYPE_EVENT:
- fprintf(stream, "<!DOCTYPE ");
-
- if(event->prefix.ptr != 0) {
- output_text(&event->prefix, stream);
- fprintf(stream, ":");
- }
- output_text(&event->name, stream);
-
- if(event->system.ptr != 0) {
- if(event->public.ptr != 0) {
- fprintf(stream, " PUBLIC \"");
- output_text(&event->public, stream);
- fprintf(stream, "\" \"");
- output_text(&event->system, stream);
- fprintf(stream, "\"");
- }
- else {
- fprintf(stream, " SYSTEM \"");
- output_text(&event->system, stream);
- fprintf(stream, "\"");
- }
- }
- fprintf(stream, ">");
- break;
- case START_ELEMENT_EVENT:
- case SELF_CLOSING_ELEMENT_EVENT:
- fprintf(stream, "<");
-#if SHOW_URIS
- if(event->uri.ptr != 0) {
- fprintf(stream, "{");
- output_text(&event->uri, stream);
- fprintf(stream, "}");
- } else
-#endif
- if(event->prefix.ptr != 0) {
- output_text(&event->prefix, stream);
- fprintf(stream, ":");
- }
- output_text(&event->name, stream);
-
- for(i = 0; i < event->attr_count; ++i) {
- fprintf(stream, " ");
-#if SHOW_URIS
- if(event->attrs[i].uri.ptr != 0) {
- fprintf(stream, "{");
- output_text(&event->attrs[i].uri, stream);
- fprintf(stream, "}");
- } else
-#endif
- if(event->attrs[i].prefix.ptr != 0) {
- output_text(&event->attrs[i].prefix, stream);
- fprintf(stream, ":");
- }
- output_text(&event->attrs[i].name, stream);
- fprintf(stream, "=\"");
-
- atval = &event->attrs[i].value;
- while(atval) {
- switch(atval->type) {
- case CHARACTERS_EVENT:
- output_escaped_attr_text(&atval->value, stream);
- break;
- case ENTITY_REFERENCE_EVENT:
- fprintf(stream, "&");
- output_text(&atval->name, stream);
- fprintf(stream, ";");
- break;
- case DEC_CHAR_REFERENCE_EVENT:
- fprintf(stream, "&#");
- output_text(&atval->name, stream);
- fprintf(stream, ";");
- break;
- case HEX_CHAR_REFERENCE_EVENT:
- fprintf(stream, "&#x");
- output_text(&atval->name, stream);
- fprintf(stream, ";");
- break;
- case ENTITY_REFERENCE_START_EVENT:
-#if SHOW_ENTITIES
- fprintf(stream, "&");
- output_text(&atval->name, stream);
- fprintf(stream, ";(");
-#endif
- break;
- case ENTITY_REFERENCE_END_EVENT:
-#if SHOW_ENTITIES
- fprintf(stream, ")");
-#endif
- break;
- default:
- break;
- }
- atval = atval->next;
- }
-
- fprintf(stream, "\"");
- }
-
- if(event->type == SELF_CLOSING_ELEMENT_EVENT)
- fprintf(stream, "/>");
- else
- fprintf(stream, ">");
- break;
- case END_ELEMENT_EVENT:
- fprintf(stream, "</");
-#if SHOW_URIS
- if(event->uri.ptr != 0) {
- fprintf(stream, "{");
- output_text(&event->uri, stream);
- fprintf(stream, "}");
- } else
-#endif
- if(event->prefix.ptr != 0) {
- output_text(&event->prefix, stream);
- fprintf(stream, ":");
- }
- output_text(&event->name, stream);
- fprintf(stream, ">");
- break;
- case CHARACTERS_EVENT:
- output_text(&event->value, stream);
- break;
- case CDATA_EVENT:
- fprintf(stream, "<![CDATA[");
- output_text(&event->value, stream);
- fprintf(stream, "]]>");
- break;
- case IGNORABLE_WHITESPACE_EVENT:
- output_text(&event->value, stream);
- break;
- case COMMENT_EVENT:
- fprintf(stream, "<!--");
- output_text(&event->value, stream);
- fprintf(stream, "-->");
- break;
- case PI_EVENT:
- fprintf(stream, "<?");
- output_text(&event->name, stream);
- if(event->value.ptr != 0) {
- fprintf(stream, " ");
- output_text(&event->value, stream);
- }
- fprintf(stream, "?>");
- break;
- case ENTITY_REFERENCE_EVENT:
- fprintf(stream, "&");
- output_text(&event->name, stream);
- fprintf(stream, ";");
- break;
- case DEC_CHAR_REFERENCE_EVENT:
- fprintf(stream, "&#");
- output_text(&event->name, stream);
- fprintf(stream, ";");
- break;
- case HEX_CHAR_REFERENCE_EVENT:
- fprintf(stream, "&#x");
- output_text(&event->name, stream);
- fprintf(stream, ";");
- break;
- case ENTITY_REFERENCE_START_EVENT:
-#if SHOW_ENTITIES
- fprintf(stream, "&");
- output_text(&event->name, stream);
- fprintf(stream, ";(");
-#endif
- break;
- case ENTITY_REFERENCE_END_EVENT:
-#if SHOW_ENTITIES
- fprintf(stream, ")");
-#endif
- break;
- case START_EXTERNAL_ENTITY_EVENT:
- case END_EXTERNAL_ENTITY_EVENT:
- case NO_EVENT:
- break;
- }
-}
-
-char *resolve_paths(const char *base, const char *path, unsigned int path_len)
-{
- unsigned int base_len = strlen(base);
-
- char *result = malloc(base_len + path_len + 1);
- char *ptr = result;
-
- strcpy(ptr, base);
- ptr += base_len - 1;
-
- while(ptr >= result && *ptr != '/') {
- --ptr;
- }
- ++ptr;
-
- strncpy(ptr, path, path_len);
- ptr += path_len;
- *ptr = 0;
-
- return result;
-}
-
-static unsigned int file_read_callback(void *userData, void *buffer, unsigned int length)
-{
- unsigned int result = fread(buffer, 1, length, (FILE*)userData);
- if(result < length) {
- fclose((FILE*)userData);
- }
- return result;
-}
-
-static FAXPP_Error entity_callback(void *userData, FAXPP_Parser *parser,
- const FAXPP_Text *system, const FAXPP_Text *public)
-{
- FILE *file;
- char *path;
-
- path = resolve_paths((char*)userData, (char*)system->ptr, system->len);
- file = fopen(path, "r");
- if(file == 0) {
- printf("Open of '%s' failed: %s\n", path, strerror(errno));
- return CANT_LOCATE_EXTERNAL_ENTITY;
- }
- free(path);
-
- return FAXPP_parse_external_entity_callback(parser, file_read_callback, file);
-}
-
int
main(int argc, char **argv)
{
@@ -342,7 +56,7 @@
for(i = 1; i < argc; ++i) {
- FAXPP_set_external_entity_callback(parser, entity_callback, argv[i]);
+ FAXPP_set_external_entity_callback(parser, entity_callback, 0);
startTime = getTime();
@@ -353,6 +67,9 @@
}
err = FAXPP_init_parse_callback(parser, file_read_callback, file);
+ if(err == NO_ERROR)
+ err = FAXPP_set_base_uri_str(parser, argv[i]);
+
if(err != NO_ERROR) {
printf("ERROR: %s\n", FAXPP_err_to_string(err));
exit(1);
Modified: trunk/faxpp/include/faxpp/event.h
===================================================================
--- trunk/faxpp/include/faxpp/event.h 2008-03-03 14:32:32 UTC (rev 39)
+++ trunk/faxpp/include/faxpp/event.h 2008-03-06 02:04:04 UTC (rev 40)
@@ -99,8 +99,8 @@
FAXPP_Text encoding; ///< The version of the event. \details Present for event types START_DOCUMENT_EVENT and START_EXTERNAL_ENTITY_EVENT
FAXPP_Text standalone; ///< The version of the event. \details Present only for the START_DOCUMENT_EVENT event type
- FAXPP...
[truncated message content] |
|
From: <jp...@us...> - 2008-03-11 17:30:01
|
Revision: 41
http://faxpp.svn.sourceforge.net/faxpp/?rev=41&view=rev
Author: jpcs
Date: 2008-03-11 10:28:12 -0700 (Tue, 11 Mar 2008)
Log Message:
-----------
Correctly tokenize notation declarations and attlist declarations.
Moved a test out of the inner loop in *element_content_state.
Modified Paths:
--------------
trunk/faxpp/TODO
trunk/faxpp/examples/entity_resolver.c
trunk/faxpp/examples/output_event.h
trunk/faxpp/examples/parser_example.c
trunk/faxpp/include/faxpp/error.h
trunk/faxpp/include/faxpp/token.h
trunk/faxpp/src/attlistdecl.c
trunk/faxpp/src/attr_states.h
trunk/faxpp/src/doctype.c
trunk/faxpp/src/element_states.h
trunk/faxpp/src/error.c
trunk/faxpp/src/notationdecl.c
trunk/faxpp/src/token.c
trunk/faxpp/src/tokenizer_states.c
trunk/faxpp/src/tokenizer_states.h
trunk/faxpp/src/xml_parser.c
trunk/faxpp/src/xml_parser.h
Modified: trunk/faxpp/TODO
===================================================================
--- trunk/faxpp/TODO 2008-03-06 02:04:04 UTC (rev 40)
+++ trunk/faxpp/TODO 2008-03-11 17:28:12 UTC (rev 41)
@@ -8,11 +8,10 @@
xml:space value checking
Error for redefining "xml" namespace
Error for defining "xmlns" namespace
+Parse element decls correctly
+Parse parameter entities in markup correctly
Large tasks
-----------
-Entity expansion framework
-DTD internal subset parsing
-DTD external subset parsing
DTD validation
Modified: trunk/faxpp/examples/entity_resolver.c
===================================================================
--- trunk/faxpp/examples/entity_resolver.c 2008-03-06 02:04:04 UTC (rev 40)
+++ trunk/faxpp/examples/entity_resolver.c 2008-03-11 17:28:12 UTC (rev 41)
@@ -60,7 +60,7 @@
file = fopen(path, "r");
if(file == 0) {
- printf("Open of '%s' failed: %s\n", path, strerror(errno));
+/* printf("Open of '%s' failed: %s\n", path, strerror(errno)); */
return CANT_LOCATE_EXTERNAL_ENTITY;
}
Modified: trunk/faxpp/examples/output_event.h
===================================================================
--- trunk/faxpp/examples/output_event.h 2008-03-06 02:04:04 UTC (rev 40)
+++ trunk/faxpp/examples/output_event.h 2008-03-11 17:28:12 UTC (rev 41)
@@ -23,5 +23,6 @@
void output_event(const FAXPP_Event *event, FILE *stream);
void output_attr_value(const FAXPP_AttrValue *atval, FILE *stream);
+void output_text(const FAXPP_Text *text, FILE *stream);
#endif
Modified: trunk/faxpp/examples/parser_example.c
===================================================================
--- trunk/faxpp/examples/parser_example.c 2008-03-06 02:04:04 UTC (rev 40)
+++ trunk/faxpp/examples/parser_example.c 2008-03-11 17:28:12 UTC (rev 41)
@@ -76,14 +76,15 @@
}
while((err = FAXPP_next_event(parser)) == 0) {
- output_event(FAXPP_get_current_event(parser), stdout);
+/* output_event(FAXPP_get_current_event(parser), stdout); */
if(FAXPP_get_current_event(parser)->type == END_DOCUMENT_EVENT)
break;
}
if(err != NO_ERROR) {
- printf("%03d:%03d ERROR: %s\n", FAXPP_get_error_line(parser),
+ output_text(FAXPP_get_base_uri(parser), stdout);
+ printf(":%d:%d ERROR: %s\n", FAXPP_get_error_line(parser),
FAXPP_get_error_column(parser), FAXPP_err_to_string(err));
}
Modified: trunk/faxpp/include/faxpp/error.h
===================================================================
--- trunk/faxpp/include/faxpp/error.h 2008-03-06 02:04:04 UTC (rev 40)
+++ trunk/faxpp/include/faxpp/error.h 2008-03-11 17:28:12 UTC (rev 41)
@@ -65,6 +65,8 @@
ILLEGAL_PARAMETER_ENTITY,
CANT_LOCATE_EXTERNAL_ENTITY,
DONT_PARSE_EXTERNAL_ENTITY,
+ INVALID_ATTRIBUTE_TYPE,
+ INVALID_DEFAULTDECL,
OUT_OF_MEMORY,
ELEMENT_NAME_MISMATCH,
Modified: trunk/faxpp/include/faxpp/token.h
===================================================================
--- trunk/faxpp/include/faxpp/token.h 2008-03-06 02:04:04 UTC (rev 40)
+++ trunk/faxpp/include/faxpp/token.h 2008-03-11 17:28:12 UTC (rev 41)
@@ -79,10 +79,14 @@
ATTLISTDECL_PREFIX_TOKEN,
ATTLISTDECL_NAME_TOKEN,
- ATTLISTDECL_CONTENT_TOKEN,
+ ATTLISTDECL_ATTDEF_PREFIX_TOKEN,
+ ATTLISTDECL_ATTDEF_NAME_TOKEN,
+ ATTLISTDECL_NOTATION_NAME_TOKEN,
+ ATTLISTDECL_ENUMERATION_NAME_TOKEN,
+ ATTLISTDECL_END_TOKEN,
NOTATIONDECL_NAME_TOKEN,
- NOTATIONDECL_CONTENT_TOKEN,
+ NOTATIONDECL_END_TOKEN,
ENTITYDECL_NAME_TOKEN,
ENTITYDECL_VALUE_TOKEN,
Modified: trunk/faxpp/src/attlistdecl.c
===================================================================
--- trunk/faxpp/src/attlistdecl.c 2008-03-06 02:04:04 UTC (rev 40)
+++ trunk/faxpp/src/attlistdecl.c 2008-03-11 17:28:12 UTC (rev 41)
@@ -70,18 +70,16 @@
switch(env->current_char) {
WHITESPACE:
- env->stored_state = attlistdecl_content_state;
- env->state = ws_state;
+ env->state = attlistdecl_attdef_name_state1;
token_end_position(env);
report_token(ATTLISTDECL_NAME_TOKEN, env);
next_char(env);
return NO_ERROR;
case '>':
- env->state = attlistdecl_content_state;
+ env->state = attlistdecl_attdef_name_state1;
token_end_position(env);
report_token(ATTLISTDECL_NAME_TOKEN, env);
// no next char
- token_start_position(env);
return NO_ERROR;
case ':':
env->state = attlistdecl_name_seen_colon_state1;
@@ -129,18 +127,16 @@
switch(env->current_char) {
WHITESPACE:
- env->stored_state = attlistdecl_content_state;
- env->state = ws_state;
+ env->state = attlistdecl_attdef_name_state1;
token_end_position(env);
report_token(ATTLISTDECL_NAME_TOKEN, env);
next_char(env);
return NO_ERROR;
case '>':
- env->state = attlistdecl_content_state;
+ env->state = attlistdecl_attdef_name_state1;
token_end_position(env);
report_token(ATTLISTDECL_NAME_TOKEN, env);
// no next char
- token_start_position(env);
return NO_ERROR;
default:
break;
@@ -156,21 +152,704 @@
}
FAXPP_Error
-attlistdecl_content_state(FAXPP_TokenizerEnv *env)
+attlistdecl_attdef_name_state1(FAXPP_TokenizerEnv *env)
{
read_char(env);
switch(env->current_char) {
+ WHITESPACE:
+ break;
case '>':
base_state(env);
+ report_empty_token(ATTLISTDECL_END_TOKEN, env);
+ break;
+ default:
+ env->state = attlistdecl_attdef_name_state2;
+ token_start_position(env);
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ATTLISTDECL_NAME;
+ return NO_ERROR;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_attdef_name_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = attlistdecl_atttype_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ATTLISTDECL_ATTDEF_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case ':':
+ env->state = attlistdecl_attdef_name_seen_colon_state1;
+ token_end_position(env);
+ report_token(ATTLISTDECL_ATTDEF_PREFIX_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ATTLISTDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_attdef_name_seen_colon_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = attlistdecl_attdef_name_seen_colon_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ATTLISTDECL_NAME;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_attdef_name_seen_colon_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = attlistdecl_atttype_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ATTLISTDECL_ATTDEF_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ATTLISTDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+/* [54] AttType ::= StringType | TokenizedType | EnumeratedType */
+/* [55] StringType ::= 'CDATA' */
+/* [56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default] */
+/* | 'IDREF' [VC: IDREF] */
+/* | 'IDREFS' [VC: IDREF] */
+/* | 'ENTITY' [VC: Entity Name] */
+/* | 'ENTITIES' [VC: Entity Name] */
+/* | 'NMTOKEN' [VC: Name Token] */
+/* | 'NMTOKENS' [VC: Name Token] */
+
+/* [57] EnumeratedType ::= NotationType | Enumeration */
+/* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' [VC: Notation Attributes] */
+/* [VC: One Notation Per Element Type] */
+/* [VC: No Notation on Empty Element] */
+/* [VC: No Duplicate Tokens] */
+/* [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration] */
+/* [VC: No Duplicate Tokens] */
+FAXPP_Error
+attlistdecl_atttype_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case 'C':
+ env->state = attlistdecl_atttype_cdata_state1;
+ break;
+ case 'I':
+ env->state = attlistdecl_atttype_id_state1;
+ break;
+ case 'E':
+ env->state = attlistdecl_atttype_entity_state1;
+ break;
+ case 'N':
+ env->state = attlistdecl_atttype_nmtoken_state1;
+ break;
+ case '(':
+ env->stored_state = attlistdecl_atttype_enumeration_name_state1;
+ env->state = ws_state;
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_ATTRIBUTE_TYPE;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+SINGLE_CHAR_STATE(attlistdecl_atttype_entity_state1, 'N', 0, attlistdecl_atttype_entity_state2, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_entity_state2, 'T', 0, attlistdecl_atttype_entity_state3, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_entity_state3, 'I', 0, attlistdecl_atttype_entity_state4, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_entity_state4, 'T', 0, attlistdecl_atttype_entity_state5, INVALID_ATTRIBUTE_TYPE)
+
+FAXPP_Error
+attlistdecl_atttype_entity_state5(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case 'Y':
+ // TBD Tokens for these - jpcs
+ env->stored_state = attlistdecl_default_state1;
+ env->state = ws_state;
+ break;
+ case 'I':
+ env->state = attlistdecl_atttype_entities_state1;
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_ATTRIBUTE_TYPE;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+// TBD Tokens for these - jpcs
+
+SINGLE_CHAR_STATE(attlistdecl_atttype_entities_state1, 'E', 0, attlistdecl_atttype_entities_state2, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_entities_state2, 'S', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE)
+
+FAXPP_Error
+attlistdecl_atttype_nmtoken_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case 'O':
+ env->state = attlistdecl_atttype_notation_state1;
+ break;
+ case 'M':
+ env->state = attlistdecl_atttype_nmtoken_state2;
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_ATTRIBUTE_TYPE;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state2, 'T', 0, attlistdecl_atttype_nmtoken_state3, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state3, 'O', 0, attlistdecl_atttype_nmtoken_state4, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state4, 'K', 0, attlistdecl_atttype_nmtoken_state5, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state5, 'E', 0, attlistdecl_atttype_nmtoken_state6, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state6, 'N', 0, attlistdecl_atttype_nmtoken_state7, INVALID_ATTRIBUTE_TYPE)
+
+FAXPP_Error
+attlistdecl_atttype_nmtoken_state7(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ // TBD Tokens for these - jpcs
+ env->stored_state = attlistdecl_default_state1;
+ env->state = ws_state;
+ break;
+ case 'S':
+ // TBD Tokens for these - jpcs
+ env->stored_state = attlistdecl_default_state1;
+ env->state = ws_plus_state;
+ break;
+ default:
+ next_char(env);
+ return INVALID_ATTRIBUTE_TYPE;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+// TBD Tokens for these - jpcs
+
+SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state1, 'T', 0, attlistdecl_atttype_notation_state2, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state2, 'A', 0, attlistdecl_atttype_notation_state3, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state3, 'T', 0, attlistdecl_atttype_notation_state4, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state4, 'I', 0, attlistdecl_atttype_notation_state5, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state5, 'O', 0, attlistdecl_atttype_notation_state6, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state6, 'N', attlistdecl_atttype_notation_state7, ws_plus_state, INVALID_ATTRIBUTE_TYPE)
+
+FAXPP_Error
+attlistdecl_atttype_notation_state7(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '(':
+ env->stored_state = attlistdecl_atttype_notation_name_state1;
+ env->state = ws_state;
+ break;
+ default:
+ next_char(env);
+ return INVALID_ATTRIBUTE_TYPE;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_atttype_notation_name_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = attlistdecl_atttype_notation_name_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_ATTRIBUTE_TYPE;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_atttype_notation_name_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = attlistdecl_atttype_notation_separator_state;
+ env->state = ws_state;
token_end_position(env);
- report_token(ATTLISTDECL_CONTENT_TOKEN, env);
+ report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env);
break;
+ case '|':
+ env->stored_state = attlistdecl_atttype_notation_name_state1;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env);
+ break;
+ case ')':
+ env->stored_state = attlistdecl_default_state1;
+ env->state = ws_plus_state;
+ token_end_position(env);
+ report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env);
+ break;
+ default:
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_ATTRIBUTE_TYPE;
+ return NO_ERROR;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_atttype_notation_separator_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '|':
+ env->stored_state = attlistdecl_atttype_notation_name_state1;
+ env->state = ws_state;
+ break;
+ case ')':
+ env->stored_state = attlistdecl_default_state1;
+ env->state = ws_plus_state;
+ break;
+ default:
+ next_char(env);
+ return INVALID_ATTRIBUTE_TYPE;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+SINGLE_CHAR_STATE(attlistdecl_atttype_id_state1, 'D', 0, attlistdecl_atttype_id_state2, INVALID_ATTRIBUTE_TYPE)
+
+FAXPP_Error
+attlistdecl_atttype_id_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ // TBD Tokens for these - jpcs
+ env->stored_state = attlistdecl_default_state1;
+ env->state = ws_state;
+ break;
+ case 'R':
+ env->state = attlistdecl_atttype_idref_state1;
+ break;
+ default:
+ next_char(env);
+ return INVALID_ATTRIBUTE_TYPE;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+SINGLE_CHAR_STATE(attlistdecl_atttype_idref_state1, 'E', 0, attlistdecl_atttype_idref_state2, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_idref_state2, 'F', 0, attlistdecl_atttype_idref_state3, INVALID_ATTRIBUTE_TYPE)
+
+FAXPP_Error
+attlistdecl_atttype_idref_state3(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ // TBD Tokens for these - jpcs
+ env->stored_state = attlistdecl_default_state1;
+ env->state = ws_state;
+ break;
+ case 'S':
+ // TBD Tokens for these - jpcs
+ env->stored_state = attlistdecl_default_state1;
+ env->state = ws_plus_state;
+ break;
+ default:
+ next_char(env);
+ return INVALID_ATTRIBUTE_TYPE;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+// TBD Tokens for these - jpcs
+
+SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state1, 'D', 0, attlistdecl_atttype_cdata_state2, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state2, 'A', 0, attlistdecl_atttype_cdata_state3, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state3, 'T', 0, attlistdecl_atttype_cdata_state4, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state4, 'A', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE)
+
+FAXPP_Error
+attlistdecl_atttype_enumeration_name_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
LINE_ENDINGS
default:
+ env->state = attlistdecl_atttype_enumeration_name_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_ATTRIBUTE_TYPE;
break;
}
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_atttype_enumeration_name_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = attlistdecl_atttype_enumeration_separator_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env);
+ break;
+ case '|':
+ env->stored_state = attlistdecl_atttype_enumeration_name_state1;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env);
+ break;
+ case ')':
+ env->stored_state = attlistdecl_default_state1;
+ env->state = ws_plus_state;
+ token_end_position(env);
+ report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env);
+ break;
+ default:
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_ATTRIBUTE_TYPE;
+ return NO_ERROR;
+ }
+
next_char(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_atttype_enumeration_separator_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '|':
+ env->stored_state = attlistdecl_atttype_enumeration_name_state1;
+ env->state = ws_state;
+ break;
+ case ')':
+ env->stored_state = attlistdecl_default_state1;
+ env->state = ws_plus_state;
+ break;
+ default:
+ next_char(env);
+ return INVALID_ATTRIBUTE_TYPE;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+/* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' */
+/* | (('#FIXED' S)? AttValue) [VC: Required Attribute] */
+/* [VC: Attribute Default Value Syntactically Correct] */
+/* [WFC: No < in Attribute Values] */
+/* [VC: Fixed Attribute Default] */
+FAXPP_Error
+attlistdecl_default_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '#':
+ env->state = attlistdecl_default_state2;
+ next_char(env);
+ return NO_ERROR;
+ case '\'':
+ env->state = attlistdecl_attvalue_apos_state;
+ break;
+ case '"':
+ env->state = attlistdecl_attvalue_quot_state;
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_DEFAULTDECL;
+ }
+
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_default_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case 'R':
+ env->state = attlistdecl_default_required_state1;
+ break;
+ case 'I':
+ env->state = attlistdecl_default_implied_state1;
+ break;
+ case 'F':
+ env->state = attlistdecl_default_fixed_state1;
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_DEFAULTDECL;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+// TBD Tokens for these - jpcs
+
+SINGLE_CHAR_STATE(attlistdecl_default_implied_state1, 'M', 0, attlistdecl_default_implied_state2, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_implied_state2, 'P', 0, attlistdecl_default_implied_state3, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_implied_state3, 'L', 0, attlistdecl_default_implied_state4, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_implied_state4, 'I', 0, attlistdecl_default_implied_state5, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_implied_state5, 'E', 0, attlistdecl_default_implied_state6, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_implied_state6, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL)
+
+SINGLE_CHAR_STATE(attlistdecl_default_required_state1, 'E', 0, attlistdecl_default_required_state2, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_required_state2, 'Q', 0, attlistdecl_default_required_state3, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_required_state3, 'U', 0, attlistdecl_default_required_state4, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_required_state4, 'I', 0, attlistdecl_default_required_state5, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_required_state5, 'R', 0, attlistdecl_default_required_state6, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_required_state6, 'E', 0, attlistdecl_default_required_state7, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_required_state7, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL)
+
+SINGLE_CHAR_STATE(attlistdecl_default_fixed_state1, 'I', 0, attlistdecl_default_fixed_state2, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_fixed_state2, 'X', 0, attlistdecl_default_fixed_state3, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_fixed_state3, 'E', 0, attlistdecl_default_fixed_state4, INVALID_DEFAULTDECL)
+SINGLE_CHAR_STATE(attlistdecl_default_fixed_state4, 'D', attlistdecl_attvalue_start_state, ws_plus_state, INVALID_DEFAULTDECL)
+
+FAXPP_Error
+attlistdecl_attvalue_start_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '\'':
+ env->state = attlistdecl_attvalue_apos_state;
+ break;
+ case '"':
+ env->state = attlistdecl_attvalue_quot_state;
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_DEFAULTDECL;
+ }
+
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_attvalue_apos_state(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ if(env->position >= env->buffer_end) {
+ if(env->token.value.ptr) {
+ token_end_position(env);
+ if(env->token.value.len != 0) {
+ report_token(ATTRIBUTE_VALUE_TOKEN, env);
+ return NO_ERROR;
+ }
+ }
+ token_start_position(env);
+ return PREMATURE_END_OF_BUFFER;
+ }
+
+ read_char_no_check(env);
+
+ switch(env->current_char) {
+ case '\'':
+ env->state = attlistdecl_attdef_name_state1;
+ token_end_position(env);
+ report_token(ATTRIBUTE_VALUE_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '&':
+ store_state(env);
+ env->state = reference_state;
+ token_end_position(env);
+ report_token(ATTRIBUTE_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '<':
+ next_char(env);
+ return INVALID_CHAR_IN_ATTRIBUTE;
+ LINE_ENDINGS
+ case '\t':
+ if(env->normalize_attrs) {
+ // Move the token to the buffer, to normalize it
+ FAXPP_Error err = FAXPP_tokenizer_release_buffer(env, 0);
+ if(err != NO_ERROR) return err;
+ env->current_char = ' ';
+ }
+ break;
+ default:
+ if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+ next_char(env);
+ return RESTRICTED_CHAR;
+ }
+ break;
+ }
+ next_char(env);
+ }
+
+ // Never happens
return NO_ERROR;
}
+FAXPP_Error
+attlistdecl_attvalue_quot_state(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ if(env->position >= env->buffer_end) {
+ if(env->token.value.ptr) {
+ token_end_position(env);
+ if(env->token.value.len != 0) {
+ report_token(ATTRIBUTE_VALUE_TOKEN, env);
+ return NO_ERROR;
+ }
+ }
+ token_start_position(env);
+ return PREMATURE_END_OF_BUFFER;
+ }
+
+ read_char_no_check(env);
+
+ switch(env->current_char) {
+ case '"':
+ env->state = attlistdecl_attdef_name_state1;
+ token_end_position(env);
+ report_token(ATTRIBUTE_VALUE_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '&':
+ store_state(env);
+ env->state = reference_state;
+ token_end_position(env);
+ report_token(ATTRIBUTE_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '<':
+ next_char(env);
+ return INVALID_CHAR_IN_ATTRIBUTE;
+ LINE_ENDINGS
+ case '\t': {
+ if(env->normalize_attrs) {
+ // Move the token to the buffer, to normalize it
+ FAXPP_Error err = FAXPP_tokenizer_release_buffer(env, 0);
+ if(err != NO_ERROR) return err;
+ env->current_char = ' ';
+ }
+ break;
+ }
+ default:
+ if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+ next_char(env);
+ return RESTRICTED_CHAR;
+ }
+ break;
+ }
+ next_char(env);
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+
Modified: trunk/faxpp/src/attr_states.h
===================================================================
--- trunk/faxpp/src/attr_states.h 2008-03-06 02:04:04 UTC (rev 40)
+++ trunk/faxpp/src/attr_states.h 2008-03-11 17:28:12 UTC (rev 41)
@@ -248,7 +248,7 @@
PREFIX(attr_value_apos_state)(FAXPP_TokenizerEnv *env)
{
while(1) {
- if(env->position >= env->buffer_end) {
+ END_CHECK_IF {
if(env->token.value.ptr) {
token_end_position(env);
if(env->token.value.len != 0) {
@@ -309,7 +309,7 @@
PREFIX(attr_value_quot_state)(FAXPP_TokenizerEnv *env)
{
while(1) {
- if(env->position >= env->buffer_end) {
+ END_CHECK_IF {
if(env->token.value.ptr) {
token_end_position(env);
if(env->token.value.len != 0) {
@@ -373,7 +373,7 @@
PREFIX(attr_value_state_en)(FAXPP_TokenizerEnv *env)
{
while(1) {
- if(env->position >= env->buffer_end) {
+ END_CHECK_IF {
if(env->token.value.ptr) {
token_end_position(env);
if(env->token.value.len != 0) {
Modified: trunk/faxpp/src/doctype.c
===================================================================
--- trunk/faxpp/src/doctype.c 2008-03-06 02:04:04 UTC (rev 40)
+++ trunk/faxpp/src/doctype.c 2008-03-11 17:28:12 UTC (rev 41)
@@ -381,7 +381,7 @@
switch(env->current_char) {
case '\'':
- env->state = system_id_ws_state;
+ env->state = public_id_ws_state2;
token_end_position(env);
report_token(PUBID_LITERAL_TOKEN, env);
next_char(env);
@@ -425,7 +425,7 @@
switch(env->current_char) {
case '"':
- env->state = system_id_ws_state;
+ env->state = public_id_ws_state2;
token_end_position(env);
report_token(PUBID_LITERAL_TOKEN, env);
next_char(env);
@@ -462,6 +462,53 @@
}
FAXPP_Error
+public_id_ws_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->state = public_id_ws_state3;
+ next_char(env);
+ break;
+ case '>':
+ if(env->stored_state == notationdecl_end_state) {
+ // Notation decls can skip the system literal
+ retrieve_state(env);
+ return NO_ERROR;
+ }
+ // Fall through
+ default:
+ env->state = system_literal_start_state;
+ return EXPECTING_WHITESPACE;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+public_id_ws_state3(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '>':
+ if(env->stored_state == notationdecl_end_state) {
+ // Notation decls can skip the system literal
+ retrieve_state(env);
+ return NO_ERROR;
+ }
+ // Fall through
+ default:
+ env->state = system_literal_start_state;
+ break;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
doctype_internal_subset_start_state(FAXPP_TokenizerEnv *env)
{
read_char(env);
Modified: trunk/faxpp/src/element_states.h
===================================================================
--- trunk/faxpp/src/element_states.h 2008-03-06 02:04:04 UTC (rev 40)
+++ trunk/faxpp/src/element_states.h 2008-03-11 17:28:12 UTC (rev 41)
@@ -190,84 +190,158 @@
FAXPP_Error
PREFIX(element_content_state)(FAXPP_TokenizerEnv *env)
{
- while(1) {
- if(env->position >= env->buffer_end) {
- if(env->token.value.ptr) {
- token_end_position(env);
- if(env->token.value.len != 0) {
- report_token(CHARACTERS_TOKEN, env);
- re...
[truncated message content] |
|
From: <jp...@us...> - 2008-03-13 21:56:43
|
Revision: 42
http://faxpp.svn.sourceforge.net/faxpp/?rev=42&view=rev
Author: jpcs
Date: 2008-03-13 14:56:47 -0700 (Thu, 13 Mar 2008)
Log Message:
-----------
Added correct tokenization of element declarations.
Modified Paths:
--------------
trunk/faxpp/include/faxpp/error.h
trunk/faxpp/include/faxpp/token.h
trunk/faxpp/src/elementdecl.c
trunk/faxpp/src/error.c
trunk/faxpp/src/token.c
trunk/faxpp/src/tokenizer_states.c
trunk/faxpp/src/tokenizer_states.h
trunk/faxpp/src/xml_parser.c
trunk/faxpp/tests/xmlconf_runner.c
Modified: trunk/faxpp/include/faxpp/error.h
===================================================================
--- trunk/faxpp/include/faxpp/error.h 2008-03-11 17:28:12 UTC (rev 41)
+++ trunk/faxpp/include/faxpp/error.h 2008-03-13 21:56:47 UTC (rev 42)
@@ -67,6 +67,7 @@
DONT_PARSE_EXTERNAL_ENTITY,
INVALID_ATTRIBUTE_TYPE,
INVALID_DEFAULTDECL,
+ INVALID_ELEMENTDECL_CONTENT,
OUT_OF_MEMORY,
ELEMENT_NAME_MISMATCH,
Modified: trunk/faxpp/include/faxpp/token.h
===================================================================
--- trunk/faxpp/include/faxpp/token.h 2008-03-11 17:28:12 UTC (rev 41)
+++ trunk/faxpp/include/faxpp/token.h 2008-03-13 21:56:47 UTC (rev 42)
@@ -75,7 +75,17 @@
ELEMENTDECL_PREFIX_TOKEN,
ELEMENTDECL_NAME_TOKEN,
- ELEMENTDECL_CONTENT_TOKEN,
+ ELEMENTDECL_EMPTY_TOKEN,
+ ELEMENTDECL_ANY_TOKEN,
+ ELEMENTDECL_PCDATA_TOKEN,
+ ELEMENTDECL_LPAR_TOKEN,
+ ELEMENTDECL_RPAR_TOKEN,
+ ELEMENTDECL_QUESTION_TOKEN,
+ ELEMENTDECL_STAR_TOKEN,
+ ELEMENTDECL_PLUS_TOKEN,
+ ELEMENTDECL_BAR_TOKEN,
+ ELEMENTDECL_COMMA_TOKEN,
+ ELEMENTDECL_END_TOKEN,
ATTLISTDECL_PREFIX_TOKEN,
ATTLISTDECL_NAME_TOKEN,
Modified: trunk/faxpp/src/elementdecl.c
===================================================================
--- trunk/faxpp/src/elementdecl.c 2008-03-11 17:28:12 UTC (rev 41)
+++ trunk/faxpp/src/elementdecl.c 2008-03-13 21:56:47 UTC (rev 42)
@@ -39,7 +39,7 @@
return NO_ERROR;
}
-#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \
+#define SINGLE_CHAR_STATE_RETURN(name, ch, next_stored_state, next_state, error, return_token) \
FAXPP_Error \
name(FAXPP_TokenizerEnv *env) \
{ \
@@ -49,6 +49,7 @@
case (ch): \
if((next_stored_state) != 0) env->stored_state = (next_stored_state); \
env->state = (next_state); \
+ if((return_token) != NO_TOKEN) { report_empty_token((return_token), env); } \
next_char(env); \
break; \
LINE_ENDINGS \
@@ -59,6 +60,8 @@
return NO_ERROR; \
}
+#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) SINGLE_CHAR_STATE_RETURN(name, ch, next_stored_state, next_state, error, NO_TOKEN)
+
SINGLE_CHAR_STATE(elementdecl_initial_state1, 'E', 0, elementdecl_initial_state2, INVALID_DTD_DECL)
SINGLE_CHAR_STATE(elementdecl_initial_state2, 'M', 0, elementdecl_initial_state3, INVALID_DTD_DECL)
SINGLE_CHAR_STATE(elementdecl_initial_state3, 'E', 0, elementdecl_initial_state4, INVALID_DTD_DECL)
@@ -168,14 +171,466 @@
read_char(env);
switch(env->current_char) {
+ case 'E':
+ env->state = elementdecl_empty_state1;
+ break;
+ case 'A':
+ env->state = elementdecl_any_state1;
+ break;
+ case '(':
+ env->nesting_level += 1;
+ env->stored_state = elementdecl_mixed_or_children_state;
+ env->state = ws_state;
+ report_empty_token(ELEMENTDECL_LPAR_TOKEN, env);
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_ELEMENTDECL_CONTENT;
+ }
+ next_char(env);
+ return NO_ERROR;
+}
+
+SINGLE_CHAR_STATE(elementdecl_empty_state1, 'M', 0, elementdecl_empty_state2, INVALID_ELEMENTDECL_CONTENT)
+SINGLE_CHAR_STATE(elementdecl_empty_state2, 'P', 0, elementdecl_empty_state3, INVALID_ELEMENTDECL_CONTENT)
+SINGLE_CHAR_STATE(elementdecl_empty_state3, 'T', 0, elementdecl_empty_state4, INVALID_ELEMENTDECL_CONTENT)
+SINGLE_CHAR_STATE_RETURN(elementdecl_empty_state4, 'Y', elementdecl_end_state, ws_state, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_EMPTY_TOKEN)
+
+SINGLE_CHAR_STATE(elementdecl_any_state1, 'N', 0, elementdecl_any_state2, INVALID_ELEMENTDECL_CONTENT)
+SINGLE_CHAR_STATE_RETURN(elementdecl_any_state2, 'Y', elementdecl_end_state, ws_state, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_ANY_TOKEN)
+
+FAXPP_Error
+elementdecl_mixed_or_children_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '#':
+ env->state = elementdecl_pcdata_state1;
+ break;
+ default:
+ env->state = elementdecl_cp_name_state1;
+ // No next_char
+ return NO_ERROR;
+ }
+ next_char(env);
+ return NO_ERROR;
+
+}
+
+FAXPP_Error
+elementdecl_cp_name_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '(':
+ env->nesting_level += 1;
+ env->stored_state = elementdecl_cp_name_state1;
+ env->state = ws_state;
+ report_empty_token(ELEMENTDECL_LPAR_TOKEN, env);
+ next_char(env);
+ break;
+ LINE_ENDINGS
+ default:
+ env->state = elementdecl_cp_name_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ELEMENTDECL_NAME;
+ break;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_cp_name_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = elementdecl_cp_separator_or_end_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ELEMENTDECL_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '?':
+ case '*':
+ case '+':
+ env->state = elementdecl_cp_cardinality_state;
+ token_end_position(env);
+ report_token(ELEMENTDECL_NAME_TOKEN, env);
+ // No next_char
+ return NO_ERROR;
+ case '|':
+ case ',':
+ case ')':
+ env->state = elementdecl_cp_separator_or_end_state;
+ token_end_position(env);
+ report_token(ELEMENTDECL_NAME_TOKEN, env);
+ // No next_char
+ return NO_ERROR;
+ case ':':
+ env->state = elementdecl_cp_name_seen_colon_state1;
+ token_end_position(env);
+ report_token(ELEMENTDECL_PREFIX_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ELEMENTDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_cp_name_seen_colon_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = elementdecl_cp_name_seen_colon_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ELEMENTDECL_NAME;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_cp_name_seen_colon_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = elementdecl_cp_separator_or_end_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ELEMENTDECL_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '?':
+ case '*':
+ case '+':
+ env->state = elementdecl_cp_cardinality_state;
+ token_end_position(env);
+ report_token(ELEMENTDECL_NAME_TOKEN, env);
+ // No next_char
+ return NO_ERROR;
+ case '|':
+ case ',':
+ case ')':
+ env->state = elementdecl_cp_separator_or_end_state;
+ token_end_position(env);
+ report_token(ELEMENTDECL_NAME_TOKEN, env);
+ // No next_char
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ELEMENTDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_cp_cardinality_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ if(env->nesting_level == 0)
+ env->stored_state = elementdecl_end_state;
+ else
+ env->stored_state = elementdecl_cp_separator_or_end_state;
+ env->state = ws_state;
+
+ switch(env->current_char) {
+ case '?':
+ report_empty_token(ELEMENTDECL_QUESTION_TOKEN, env);
+ break;
+ case '*':
+ report_empty_token(ELEMENTDECL_STAR_TOKEN, env);
+ break;
+ case '+':
+ report_empty_token(ELEMENTDECL_PLUS_TOKEN, env);
+ break;
+ default:
+ // No next_char
+ return NO_ERROR;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_cp_separator_or_end_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '|':
+ env->stored_state = elementdecl_cp_name_state1;
+ env->state = ws_state;
+ report_empty_token(ELEMENTDECL_BAR_TOKEN, env);
+ break;
+ case ',':
+ env->stored_state = elementdecl_cp_name_state1;
+ env->state = ws_state;
+ report_empty_token(ELEMENTDECL_COMMA_TOKEN, env);
+ break;
+ case ')':
+ env->nesting_level -= 1;
+ env->state = elementdecl_cp_cardinality_state;
+ report_empty_token(ELEMENTDECL_RPAR_TOKEN, env);
+ break;
+ default:
+ next_char(env);
+ return INVALID_ELEMENTDECL_CONTENT;
+ }
+
+ next_char(env);
+ return NO_ERROR;
+}
+
+SINGLE_CHAR_STATE(elementdecl_pcdata_state1, 'P', 0, elementdecl_pcdata_state2, INVALID_ELEMENTDECL_CONTENT)
+SINGLE_CHAR_STATE(elementdecl_pcdata_state2, 'C', 0, elementdecl_pcdata_state3, INVALID_ELEMENTDECL_CONTENT)
+SINGLE_CHAR_STATE(elementdecl_pcdata_state3, 'D', 0, elementdecl_pcdata_state4, INVALID_ELEMENTDECL_CONTENT)
+SINGLE_CHAR_STATE(elementdecl_pcdata_state4, 'A', 0, elementdecl_pcdata_state5, INVALID_ELEMENTDECL_CONTENT)
+SINGLE_CHAR_STATE(elementdecl_pcdata_state5, 'T', 0, elementdecl_pcdata_state6, INVALID_ELEMENTDECL_CONTENT)
+SINGLE_CHAR_STATE_RETURN(elementdecl_pcdata_state6, 'A', elementdecl_pcdata_end_or_names_state1, ws_state, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_PCDATA_TOKEN)
+
+FAXPP_Error
+elementdecl_pcdata_end_or_names_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case ')':
+ env->nesting_level -= 1;
+ env->state = elementdecl_pcdata_optional_star_state;
+ report_empty_token(ELEMENTDECL_RPAR_TOKEN, env);
+ break;
+ case '|':
+ env->stored_state = elementdecl_pcdata_name_state1;
+ env->state = ws_state;
+ report_empty_token(ELEMENTDECL_BAR_TOKEN, env);
+ break;
+ default:
+ next_char(env);
+ return INVALID_ELEMENTDECL_CONTENT;
+ }
+ next_char(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_pcdata_optional_star_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '*':
+ report_empty_token(ELEMENTDECL_STAR_TOKEN, env);
+ next_char(env);
+ // Fall through
+ default:
+ env->stored_state = elementdecl_end_state;
+ env->state = ws_state;
+ // No next_char
+ break;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_pcdata_end_or_names_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case ')':
+ env->nesting_level -= 1;
+ env->state = elementdecl_pcdata_star_state;
+ report_empty_token(ELEMENTDECL_RPAR_TOKEN, env);
+ break;
+ case '|':
+ env->stored_state = elementdecl_pcdata_name_state1;
+ env->state = ws_state;
+ report_empty_token(ELEMENTDECL_BAR_TOKEN, env);
+ break;
+ default:
+ next_char(env);
+ return INVALID_ELEMENTDECL_CONTENT;
+ }
+ next_char(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_pcdata_star_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ env->stored_state = elementdecl_end_state;
+ env->state = ws_state;
+
+ switch(env->current_char) {
+ case '*':
+ report_empty_token(ELEMENTDECL_STAR_TOKEN, env);
+ next_char(env);
+ break;
+ default:
+ next_char(env);
+ return INVALID_ELEMENTDECL_CONTENT;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_pcdata_name_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = elementdecl_pcdata_name_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ELEMENTDECL_NAME;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_pcdata_name_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = elementdecl_pcdata_end_or_names_state2;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ELEMENTDECL_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case ')':
+ env->state = elementdecl_pcdata_end_or_names_state2;
+ token_end_position(env);
+ report_token(ELEMENTDECL_NAME_TOKEN, env);
+ // No next_char
+ return NO_ERROR;
+ case ':':
+ env->state = elementdecl_pcdata_name_seen_colon_state1;
+ token_end_position(env);
+ report_token(ELEMENTDECL_PREFIX_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ELEMENTDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_pcdata_name_seen_colon_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = elementdecl_pcdata_name_seen_colon_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ELEMENTDECL_NAME;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_pcdata_name_seen_colon_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = elementdecl_pcdata_end_or_names_state2;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ELEMENTDECL_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case ')':
+ env->state = elementdecl_pcdata_end_or_names_state2;
+ token_end_position(env);
+ report_token(ELEMENTDECL_NAME_TOKEN, env);
+ // No next_char
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ELEMENTDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_end_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
case '>':
base_state(env);
- token_end_position(env);
- report_token(ELEMENTDECL_CONTENT_TOKEN, env);
+ report_empty_token(ELEMENTDECL_END_TOKEN, env);
break;
LINE_ENDINGS
default:
- break;
+ next_char(env);
+ return INVALID_ELEMENTDECL_CONTENT;
}
next_char(env);
return NO_ERROR;
Modified: trunk/faxpp/src/error.c
===================================================================
--- trunk/faxpp/src/error.c 2008-03-11 17:28:12 UTC (rev 41)
+++ trunk/faxpp/src/error.c 2008-03-13 21:56:47 UTC (rev 42)
@@ -129,6 +129,8 @@
return "INVALID_ATTRIBUTE_TYPE";
case INVALID_DEFAULTDECL:
return "INVALID_DEFAULTDECL";
+ case INVALID_ELEMENTDECL_CONTENT:
+ return "INVALID_ELEMENTDECL_CONTENT";
case NO_ERROR:
break;
}
Modified: trunk/faxpp/src/token.c
===================================================================
--- trunk/faxpp/src/token.c 2008-03-11 17:28:12 UTC (rev 41)
+++ trunk/faxpp/src/token.c 2008-03-13 21:56:47 UTC (rev 42)
@@ -103,8 +103,28 @@
return "ELEMENTDECL_PREFIX_TOKEN";
case ELEMENTDECL_NAME_TOKEN:
return "ELEMENTDECL_NAME_TOKEN";
- case ELEMENTDECL_CONTENT_TOKEN:
- return "ELEMENTDECL_CONTENT_TOKEN";
+ case ELEMENTDECL_EMPTY_TOKEN:
+ return "ELEMENTDECL_EMPTY_TOKEN";
+ case ELEMENTDECL_ANY_TOKEN:
+ return "ELEMENTDECL_ANY_TOKEN";
+ case ELEMENTDECL_PCDATA_TOKEN:
+ return "ELEMENTDECL_PCDATA_TOKEN";
+ case ELEMENTDECL_LPAR_TOKEN:
+ return "ELEMENTDECL_LPAR_TOKEN";
+ case ELEMENTDECL_RPAR_TOKEN:
+ return "ELEMENTDECL_RPAR_TOKEN";
+ case ELEMENTDECL_QUESTION_TOKEN:
+ return "ELEMENTDECL_QUESTION_TOKEN";
+ case ELEMENTDECL_STAR_TOKEN:
+ return "ELEMENTDECL_STAR_TOKEN";
+ case ELEMENTDECL_PLUS_TOKEN:
+ return "ELEMENTDECL_PLUS_TOKEN";
+ case ELEMENTDECL_BAR_TOKEN:
+ return "ELEMENTDECL_BAR_TOKEN";
+ case ELEMENTDECL_COMMA_TOKEN:
+ return "ELEMENTDECL_COMMA_TOKEN";
+ case ELEMENTDECL_END_TOKEN:
+ return "ELEMENTDECL_END_TOKEN";
case ATTLISTDECL_PREFIX_TOKEN:
return "ATTLISTDECL_PREFIX_TOKEN";
Modified: trunk/faxpp/src/tokenizer_states.c
===================================================================
--- trunk/faxpp/src/tokenizer_states.c 2008-03-11 17:28:12 UTC (rev 41)
+++ trunk/faxpp/src/tokenizer_states.c 2008-03-13 21:56:47 UTC (rev 42)
@@ -504,6 +504,62 @@
return "elementdecl_name_seen_colon_state2";
else if(state == elementdecl_content_state)
return "elementdecl_content_state";
+ else if(state == elementdecl_empty_state1)
+ return "elementdecl_empty_state1";
+ else if(state == elementdecl_empty_state2)
+ return "elementdecl_empty_state2";
+ else if(state == elementdecl_empty_state3)
+ return "elementdecl_empty_state3";
+ else if(state == elementdecl_empty_state4)
+ return "elementdecl_empty_state4";
+ else if(state == elementdecl_any_state1)
+ return "elementdecl_any_state1";
+ else if(state == elementdecl_any_state2)
+ return "elementdecl_any_state2";
+ else if(state == elementdecl_mixed_or_children_state)
+ return "elementdecl_mixed_or_children_state";
+ else if(state == elementdecl_cp_name_state1)
+ return "elementdecl_cp_name_state1";
+ else if(state == elementdecl_cp_name_state2)
+ return "elementdecl_cp_name_state2";
+ else if(state == elementdecl_cp_name_seen_colon_state1)
+ return "elementdecl_cp_name_seen_colon_state1";
+ else if(state == elementdecl_cp_name_seen_colon_state2)
+ return "elementdecl_cp_name_seen_colon_state2";
+ else if(state == elementdecl_cp_cardinality_state)
+ return "elementdecl_cp_cardinality_state";
+ else if(state == elementdecl_cp_separator_or_end_state)
+ return "elementdecl_cp_separator_or_end_state";
+ else if(state == elementdecl_pcdata_state1)
+ return "elementdecl_pcdata_state1";
+ else if(state == elementdecl_pcdata_state2)
+ return "elementdecl_pcdata_state2";
+ else if(state == elementdecl_pcdata_state3)
+ return "elementdecl_pcdata_state3";
+ else if(state == elementdecl_pcdata_state4)
+ return "elementdecl_pcdata_state4";
+ else if(state == elementdecl_pcdata_state5)
+ return "elementdecl_pcdata_state5";
+ else if(state == elementdecl_pcdata_state6)
+ return "elementdecl_pcdata_state6";
+ else if(state == elementdecl_pcdata_end_or_names_state1)
+ return "elementdecl_pcdata_end_or_names_state1";
+ else if(state == elementdecl_pcdata_optional_star_state)
+ return "elementdecl_pcdata_optional_star_state";
+ else if(state == elementdecl_pcdata_end_or_names_state2)
+ return "elementdecl_pcdata_end_or_names_state2";
+ else if(state == elementdecl_pcdata_star_state)
+ return "elementdecl_pcdata_star_state";
+ else if(state == elementdecl_pcdata_name_state1)
+ return "elementdecl_pcdata_name_state1";
+ else if(state == elementdecl_pcdata_name_state2)
+ return "elementdecl_pcdata_name_state2";
+ else if(state == elementdecl_pcdata_name_seen_colon_state1)
+ return "elementdecl_pcdata_name_seen_colon_state1";
+ else if(state == elementdecl_pcdata_name_seen_colon_state2)
+ return "elementdecl_pcdata_name_seen_colon_state2";
+ else if(state == elementdecl_end_state)
+ return "elementdecl_end_state";
else if(state == attlistdecl_initial_state1)
return "attlistdecl_initial_state1";
Modified: trunk/faxpp/src/tokenizer_states.h
===================================================================
--- trunk/faxpp/src/tokenizer_states.h 2008-03-11 17:28:12 UTC (rev 41)
+++ trunk/faxpp/src/tokenizer_states.h 2008-03-13 21:56:47 UTC (rev 42)
@@ -302,6 +302,34 @@
FAXPP_Error elementdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_name_seen_colon_state2(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_content_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_empty_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_empty_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_empty_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_empty_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_any_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_any_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_mixed_or_children_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_cp_name_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_cp_name_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_cp_name_seen_colon_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_cp_name_seen_colon_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_cp_cardinality_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_cp_separator_or_end_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_state6(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_end_or_names_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_optional_star_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_end_or_names_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_star_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_name_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_name_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_name_seen_colon_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_pcdata_name_seen_colon_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_end_state(FAXPP_TokenizerEnv *env);
FAXPP_Error attlistdecl_initial_state1(FAXPP_TokenizerEnv *env);
FAXPP_Error attlistdecl_initial_state2(FAXPP_TokenizerEnv *env);
@@ -426,7 +454,7 @@
*********************/
#ifdef DEBUG
-const char *state_to_string(FAXPP_StateFunction state);
+const char *FAXPP_state_to_string(FAXPP_StateFunction state);
#endif
#define read_char_no_check(env) \
@@ -441,7 +469,7 @@
} \
\
/* printf("%03d:%03d State: %s, Byte: %c, Char: %08X\n", (env)->line, (env)->column, */ \
-/* state_to_string((env)->state), *(unsigned char*)(env)->position, */ \
+/* FAXPP_state_to_string((env)->state), *(unsigned char*)(env)->position, */ \
/* (env)->current_char); */ \
}
Modified: trunk/faxpp/src/xml_parser.c
===================================================================
--- trunk/faxpp/src/xml_parser.c 2008-03-11 17:28:12 UTC (rev 41)
+++ trunk/faxpp/src/xml_parser.c 2008-03-13 21:56:47 UTC (rev 42)
@@ -1562,7 +1562,18 @@
break;
case ELEMENTDECL_PREFIX_TOKEN:
case ELEMENTDECL_NAME_TOKEN:
- case ELEMENTDECL_CONTENT_TOKEN:
+ case ELEMENTDECL_EMPTY_TOKEN:
+ case ELEMENTDECL_ANY_TOKEN:
+ case ELEMENTDECL_PCDATA_TOKEN:
+ case ELEMENTDECL_LPAR_TOKEN:
+ case ELEMENTDECL_RPAR_TOKEN:
+ case ELEMENTDECL_QUESTION_TOKEN:
+ case ELEMENTDECL_STAR_TOKEN:
+ case ELEMENTDECL_PLUS_TOKEN:
+ case ELEMENTDECL_BAR_TOKEN:
+ case ELEMENTDECL_COMMA_TOKEN:
+ case ELEMENTDECL_END_TOKEN:
+
case ATTLISTDECL_ATTDEF_PREFIX_TOKEN:
case ATTLISTDECL_ATTDEF_NAME_TOKEN:
case ATTLISTDECL_NOTATION_NAME_TOKEN:
@@ -1615,7 +1626,17 @@
case PE_REFERENCE_TOKEN:
case ELEMENTDECL_PREFIX_TOKEN:
case ELEMENTDECL_NAME_TOKEN:
- case ELEMENTDECL_CONTENT_TOKEN:
+ case ELEMENTDECL_EMPTY_TOKEN:
+ case ELEMENTDECL_ANY_TOKEN:
+ case ELEMENTDECL_PCDATA_TOKEN:
+ case ELEMENTDECL_LPAR_TOKEN:
+ case ELEMENTDECL_RPAR_TOKEN:
+ case ELEMENTDECL_QUESTION_TOKEN:
+ case ELEMENTDECL_STAR_TOKEN:
+ case ELEMENTDECL_PLUS_TOKEN:
+ case ELEMENTDECL_BAR_TOKEN:
+ case ELEMENTDECL_COMMA_TOKEN:
+ case ELEMENTDECL_END_TOKEN:
case ATTLISTDECL_PREFIX_TOKEN:
case ATTLISTDECL_NAME_TOKEN:
case ATTLISTDECL_ATTDEF_PREFIX_TOKEN:
Modified: trunk/faxpp/tests/xmlconf_runner.c
===================================================================
--- trunk/faxpp/tests/xmlconf_runner.c 2008-03-11 17:28:12 UTC (rev 41)
+++ trunk/faxpp/tests/xmlconf_runner.c 2008-03-13 21:56:47 UTC (rev 42)
@@ -23,10 +23,13 @@
#include "../examples/entity_resolver.h"
#include "../examples/output_event.h"
-void error(FAXPP_Error err, unsigned int line, unsigned int column)
+void error(const FAXPP_Parser *parser, FAXPP_Error err)
{
+ unsigned int line = FAXPP_get_error_line(parser);
+
if(line != 0) {
- fprintf(stderr, "%03d:%03d FAXPP_Error: %s\n", line, column, FAXPP_err_to_string(err));
+ output_text(FAXPP_get_base_uri(parser), stderr);
+ fprintf(stderr, ":%03d:%03d FAXPP_Error: %s\n", line, FAXPP_get_error_column(parser), FAXPP_err_to_string(err));
} else {
fprintf(stderr, "FAXPP_Error: %s\n", FAXPP_err_to_string(err));
}
@@ -140,10 +143,10 @@
FAXPP_set_external_entity_callback(parser, entity_callback, 0);
err = FAXPP_init_parse_file(parser, file);
- if(err != NO_ERROR) error(err, 0, 0);
+ if(err != NO_ERROR) error(parser, err);
err = FAXPP_set_base_uri_str(parser, testFile);
- if(err != NO_ERROR) error(err, 0, 0);
+ if(err != NO_ERROR) error(parser, err);
while((err = FAXPP_next_event(parser)) == 0) {
event = FAXPP_get_current_event(parser);
@@ -275,8 +278,7 @@
}
}
- if(err != NO_ERROR) error(err, FAXPP_get_error_line(parser),
- FAXPP_get_error_column(parser));
+ if(err != NO_ERROR) error(parser, err);
cleanup:
printf("\n\nTests run: %d, Tests passed: %d, Tests skipped: %d, Tests failed: %d (%.3f%%)\n", test_passes + test_failures + test_skips,
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|
|
From: <jp...@us...> - 2008-03-17 10:41:08
|
Revision: 45
http://faxpp.svn.sourceforge.net/faxpp/?rev=45&view=rev
Author: jpcs
Date: 2008-03-17 03:41:03 -0700 (Mon, 17 Mar 2008)
Log Message:
-----------
Fully parse parameter entity references in element and attlist
declarations.
Fixed a number of miscelaneous bugs.
Modified Paths:
--------------
trunk/faxpp/include/faxpp/error.h
trunk/faxpp/src/attlistdecl.c
trunk/faxpp/src/elementdecl.c
trunk/faxpp/src/error.c
trunk/faxpp/src/pi.c
trunk/faxpp/src/tokenizer_states.c
trunk/faxpp/src/tokenizer_states.h
trunk/faxpp/src/xml_parser.c
trunk/faxpp/src/xml_tokenizer.c
trunk/faxpp/src/xml_tokenizer.h
trunk/faxpp/src/xmldecl.c
Modified: trunk/faxpp/include/faxpp/error.h
===================================================================
--- trunk/faxpp/include/faxpp/error.h 2008-03-15 10:59:42 UTC (rev 44)
+++ trunk/faxpp/include/faxpp/error.h 2008-03-17 10:41:03 UTC (rev 45)
@@ -70,6 +70,7 @@
INVALID_ELEMENTDECL_CONTENT,
INVALID_CONDITIONAL_SECTION,
IMPROPER_NESTING_OF_ENTITY,
+ PARAMETER_ENTITY_IN_INTERNAL_SUBSET,
OUT_OF_MEMORY,
ELEMENT_NAME_MISMATCH,
Modified: trunk/faxpp/src/attlistdecl.c
===================================================================
--- trunk/faxpp/src/attlistdecl.c 2008-03-15 10:59:42 UTC (rev 44)
+++ trunk/faxpp/src/attlistdecl.c 2008-03-17 10:41:03 UTC (rev 45)
@@ -45,9 +45,62 @@
SINGLE_CHAR_STATE(attlistdecl_initial_state3, 'L', 0, attlistdecl_initial_state4, INVALID_DTD_DECL)
SINGLE_CHAR_STATE(attlistdecl_initial_state4, 'I', 0, attlistdecl_initial_state5, INVALID_DTD_DECL)
SINGLE_CHAR_STATE(attlistdecl_initial_state5, 'S', 0, attlistdecl_initial_state6, INVALID_DTD_DECL)
-SINGLE_CHAR_STATE(attlistdecl_initial_state6, 'T', attlistdecl_name_state1, ws_plus_state, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(attlistdecl_initial_state6, 'T', 0, attlistdecl_name_ws_state1, INVALID_DTD_DECL)
FAXPP_Error
+attlistdecl_name_ws_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->state = attlistdecl_name_ws_state2;
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = attlistdecl_name_state1;
+ token_start_position(env);
+ // No next_char
+ return EXPECTING_WHITESPACE;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_name_ws_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = attlistdecl_name_state1;
+ token_start_position(env);
+ // No next_char
+ break;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
attlistdecl_name_state1(FAXPP_TokenizerEnv *env)
{
read_char(env);
@@ -73,10 +126,11 @@
switch(env->current_char) {
WHITESPACE:
- env->state = attlistdecl_attdef_name_state1;
+ case '%':
+ env->state = attlistdecl_attdef_name_ws_state1;
token_end_position(env);
report_token(ATTLISTDECL_NAME_TOKEN, env);
- next_char(env);
+ // No next_char
return NO_ERROR;
case '>':
env->state = attlistdecl_attdef_name_state1;
@@ -130,10 +184,11 @@
switch(env->current_char) {
WHITESPACE:
- env->state = attlistdecl_attdef_name_state1;
+ case '%':
+ env->state = attlistdecl_attdef_name_ws_state1;
token_end_position(env);
report_token(ATTLISTDECL_NAME_TOKEN, env);
- next_char(env);
+ // No next_char
return NO_ERROR;
case '>':
env->state = attlistdecl_attdef_name_state1;
@@ -155,18 +210,74 @@
}
FAXPP_Error
-attlistdecl_attdef_name_state1(FAXPP_TokenizerEnv *env)
+attlistdecl_attdef_name_ws_state1(FAXPP_TokenizerEnv *env)
{
read_char(env);
switch(env->current_char) {
WHITESPACE:
+ env->state = attlistdecl_attdef_name_ws_state2;
break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
case '>':
base_state(env);
report_empty_token(ATTLISTDECL_END_TOKEN, env);
break;
default:
+ env->state = attlistdecl_attdef_name_state1;
+ token_start_position(env);
+ // No next_char
+ return EXPECTING_WHITESPACE;
+ }
+ next_char(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_attdef_name_ws_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = attlistdecl_attdef_name_state1;
+ token_start_position(env);
+ // No next_char
+ break;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_attdef_name_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '>':
+ base_state(env);
+ report_empty_token(ATTLISTDECL_END_TOKEN, env);
+ break;
+ LINE_ENDINGS
+ default:
env->state = attlistdecl_attdef_name_state2;
token_start_position(env);
next_char(env);
@@ -187,11 +298,11 @@
switch(env->current_char) {
WHITESPACE:
- env->stored_state = attlistdecl_atttype_state;
- env->state = ws_state;
+ case '%':
+ env->state = attlistdecl_atttype_ws_state1;
token_end_position(env);
report_token(ATTLISTDECL_ATTDEF_NAME_TOKEN, env);
- next_char(env);
+ // No next_char
return NO_ERROR;
case ':':
env->state = attlistdecl_attdef_name_seen_colon_state1;
@@ -239,11 +350,11 @@
switch(env->current_char) {
WHITESPACE:
- env->stored_state = attlistdecl_atttype_state;
- env->state = ws_state;
+ case '%':
+ env->state = attlistdecl_atttype_ws_state1;
token_end_position(env);
report_token(ATTLISTDECL_ATTDEF_NAME_TOKEN, env);
- next_char(env);
+ // No next_char
return NO_ERROR;
default:
break;
@@ -258,6 +369,59 @@
return NO_ERROR;
}
+FAXPP_Error
+attlistdecl_atttype_ws_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->state = attlistdecl_atttype_ws_state2;
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = attlistdecl_atttype_state;
+ token_start_position(env);
+ // No next_char
+ return EXPECTING_WHITESPACE;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_atttype_ws_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = attlistdecl_atttype_state;
+ token_start_position(env);
+ // No next_char
+ break;
+ }
+ return NO_ERROR;
+}
+
/* [54] AttType ::= StringType | TokenizedType | EnumeratedType */
/* [55] StringType ::= 'CDATA' */
/* [56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default] */
@@ -294,8 +458,7 @@
env->state = attlistdecl_atttype_nmtoken_state1;
break;
case '(':
- env->stored_state = attlistdecl_atttype_enumeration_name_state1;
- env->state = ws_state;
+ env->state = attlistdecl_atttype_enumeration_name_ws_state;
break;
LINE_ENDINGS
default:
@@ -319,8 +482,7 @@
switch(env->current_char) {
case 'Y':
- env->stored_state = attlistdecl_default_state1;
- env->state = ws_state;
+ env->state = attlistdecl_default_ws_state1;
report_empty_token(ATTLISTDECL_ATTTYPE_ENTITY_TOKEN, env);
break;
case 'I':
@@ -337,7 +499,7 @@
}
SINGLE_CHAR_STATE(attlistdecl_atttype_entities_state1, 'E', 0, attlistdecl_atttype_entities_state2, INVALID_ATTRIBUTE_TYPE)
-SINGLE_CHAR_STATE_RETURN(attlistdecl_atttype_entities_state2, 'S', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE, ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN)
+SINGLE_CHAR_STATE_RETURN(attlistdecl_atttype_entities_state2, 'S', 0, attlistdecl_default_ws_state1, INVALID_ATTRIBUTE_TYPE, ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN)
FAXPP_Error
attlistdecl_atttype_nmtoken_state1(FAXPP_TokenizerEnv *env)
@@ -374,13 +536,14 @@
switch(env->current_char) {
WHITESPACE:
- env->stored_state = attlistdecl_default_state1;
- env->state = ws_state;
+ case '%':
+ env->state = attlistdecl_default_ws_state1;
report_empty_token(ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN, env);
+ // No next_char
+ return NO_ERROR;
break;
case 'S':
- env->stored_state = attlistdecl_default_state1;
- env->state = ws_plus_state;
+ env->state = attlistdecl_default_ws_state1;
report_empty_token(ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN, env);
break;
default:
@@ -397,17 +560,68 @@
SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state3, 'T', 0, attlistdecl_atttype_notation_state4, INVALID_ATTRIBUTE_TYPE)
SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state4, 'I', 0, attlistdecl_atttype_notation_state5, INVALID_ATTRIBUTE_TYPE)
SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state5, 'O', 0, attlistdecl_atttype_notation_state6, INVALID_ATTRIBUTE_TYPE)
-SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state6, 'N', attlistdecl_atttype_notation_state7, ws_plus_state, INVALID_ATTRIBUTE_TYPE)
+SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state6, 'N', 0, attlistdecl_atttype_notation_ws_state1, INVALID_ATTRIBUTE_TYPE)
FAXPP_Error
-attlistdecl_atttype_notation_state7(FAXPP_TokenizerEnv *env)
+attlistdecl_atttype_notation_ws_state1(FAXPP_TokenizerEnv *env)
{
read_char(env);
switch(env->current_char) {
+ WHITESPACE:
+ env->state = attlistdecl_atttype_notation_ws_state2;
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = attlistdecl_atttype_notation_lpar_state;
+ token_start_position(env);
+ // No next_char
+ return EXPECTING_WHITESPACE;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_atttype_notation_ws_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = attlistdecl_atttype_notation_lpar_state;
+ token_start_position(env);
+ // No next_char
+ break;
+ }
+ return NO_ERROR;
+}
+FAXPP_Error
+attlistdecl_atttype_notation_lpar_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
case '(':
- env->stored_state = attlistdecl_atttype_notation_name_state1;
- env->state = ws_state;
+ env->state = attlistdecl_atttype_notation_name_ws_state;
break;
default:
next_char(env);
@@ -419,6 +633,32 @@
}
FAXPP_Error
+attlistdecl_atttype_notation_name_ws_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = attlistdecl_atttype_notation_name_state1;
+ token_start_position(env);
+ // No next_char
+ break;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
attlistdecl_atttype_notation_name_state1(FAXPP_TokenizerEnv *env)
{
read_char(env);
@@ -443,20 +683,19 @@
switch(env->current_char) {
WHITESPACE:
- env->stored_state = attlistdecl_atttype_notation_separator_state;
- env->state = ws_state;
+ case '%':
+ env->state = attlistdecl_atttype_notation_separator_ws_state;
token_end_position(env);
report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env);
- break;
+ // No next_char
+ return NO_ERROR;
case '|':
- env->stored_state = attlistdecl_atttype_notation_name_state1;
- env->state = ws_state;
+ env->state = attlistdecl_atttype_notation_name_ws_state;
token_end_position(env);
report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env);
break;
case ')':
- env->stored_state = attlistdecl_default_state1;
- env->state = ws_plus_state;
+ env->state = attlistdecl_default_ws_state1;
token_end_position(env);
report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env);
break;
@@ -472,18 +711,42 @@
}
FAXPP_Error
+attlistdecl_atttype_notation_separator_ws_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = attlistdecl_atttype_notation_separator_state;
+ token_start_position(env);
+ // No next_char
+ break;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
attlistdecl_atttype_notation_separator_state(FAXPP_TokenizerEnv *env)
{
read_char(env);
switch(env->current_char) {
case '|':
- env->stored_state = attlistdecl_atttype_notation_name_state1;
- env->state = ws_state;
+ env->state = attlistdecl_atttype_notation_name_ws_state;
break;
case ')':
- env->stored_state = attlistdecl_default_state1;
- env->state = ws_plus_state;
+ env->state = attlistdecl_default_ws_state1;
break;
default:
next_char(env);
@@ -503,10 +766,11 @@
switch(env->current_char) {
WHITESPACE:
- env->stored_state = attlistdecl_default_state1;
- env->state = ws_state;
+ case '%':
+ env->state = attlistdecl_default_ws_state1;
report_empty_token(ATTLISTDECL_ATTTYPE_ID_TOKEN, env);
- break;
+ // No next_char
+ return NO_ERROR;
case 'R':
env->state = attlistdecl_atttype_idref_state1;
break;
@@ -529,13 +793,13 @@
switch(env->current_char) {
WHITESPACE:
- env->stored_state = attlistdecl_default_state1;
- env->state = ws_state;
+ case '%':
+ env->state = attlistdecl_default_ws_state1;
report_empty_token(ATTLISTDECL_ATTTYPE_IDREF_TOKEN, env);
- break;
+ // No next_char
+ return NO_ERROR;
case 'S':
- env->stored_state = attlistdecl_default_state1;
- env->state = ws_plus_state;
+ env->state = attlistdecl_default_ws_state1;
report_empty_token(ATTLISTDECL_ATTTYPE_IDREFS_TOKEN, env);
break;
default:
@@ -550,9 +814,35 @@
SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state1, 'D', 0, attlistdecl_atttype_cdata_state2, INVALID_ATTRIBUTE_TYPE)
SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state2, 'A', 0, attlistdecl_atttype_cdata_state3, INVALID_ATTRIBUTE_TYPE)
SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state3, 'T', 0, attlistdecl_atttype_cdata_state4, INVALID_ATTRIBUTE_TYPE)
-SINGLE_CHAR_STATE_RETURN(attlistdecl_atttype_cdata_state4, 'A', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE, ATTLISTDECL_ATTTYPE_CDATA_TOKEN)
+SINGLE_CHAR_STATE_RETURN(attlistdecl_atttype_cdata_state4, 'A', 0, attlistdecl_default_ws_state1, INVALID_ATTRIBUTE_TYPE, ATTLISTDECL_ATTTYPE_CDATA_TOKEN)
FAXPP_Error
+attlistdecl_atttype_enumeration_name_ws_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = attlistdecl_atttype_enumeration_name_state1;
+ token_start_position(env);
+ // No next_char
+ break;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
attlistdecl_atttype_enumeration_name_state1(FAXPP_TokenizerEnv *env)
{
read_char(env);
@@ -562,7 +852,7 @@
default:
env->state = attlistdecl_atttype_enumeration_name_state2;
next_char(env);
- if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ if(env->current_char != ':' && (FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
return INVALID_ATTRIBUTE_TYPE;
break;
}
@@ -577,23 +867,24 @@
switch(env->current_char) {
WHITESPACE:
- env->stored_state = attlistdecl_atttype_enumeration_separator_state;
- env->state = ws_state;
+ case '%':
+ env->state = attlistdecl_atttype_enumeration_separator_ws_state;
token_end_position(env);
report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env);
- break;
+ // No next_char
+ return NO_ERROR;
case '|':
- env->stored_state = attlistdecl_atttype_enumeration_name_state1;
- env->state = ws_state;
+ env->state = attlistdecl_atttype_enumeration_name_ws_state;
token_end_position(env);
report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env);
break;
case ')':
- env->stored_state = attlistdecl_default_state1;
- env->state = ws_plus_state;
+ env->state = attlistdecl_default_ws_state1;
token_end_position(env);
report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env);
break;
+ case ':':
+ break;
default:
next_char(env);
if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
@@ -606,18 +897,42 @@
}
FAXPP_Error
+attlistdecl_atttype_enumeration_separator_ws_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = attlistdecl_atttype_enumeration_separator_state;
+ token_start_position(env);
+ // No next_char
+ break;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
attlistdecl_atttype_enumeration_separator_state(FAXPP_TokenizerEnv *env)
{
read_char(env);
switch(env->current_char) {
case '|':
- env->stored_state = attlistdecl_atttype_enumeration_name_state1;
- env->state = ws_state;
+ env->state = attlistdecl_atttype_enumeration_name_ws_state;
break;
case ')':
- env->stored_state = attlistdecl_default_state1;
- env->state = ws_plus_state;
+ env->state = attlistdecl_default_ws_state1;
break;
default:
next_char(env);
@@ -628,6 +943,59 @@
return NO_ERROR;
}
+FAXPP_Error
+attlistdecl_default_ws_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->state = attlistdecl_default_ws_state2;
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = attlistdecl_default_state1;
+ token_start_position(env);
+ // No next_char
+ return EXPECTING_WHITESPACE;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+attlistdecl_default_ws_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = attlistdecl_default_state1;
+ token_start_position(env);
+ // No next_char
+ break;
+ }
+ return NO_ERROR;
+}
+
/* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' */
/* | (('#FIXED' S)? AttValue) [VC: Required Attribute] */
/* [VC: Attribute Default Value Syntactically Correct] */
@@ -690,7 +1058,7 @@
SINGLE_CHAR_STATE(attlistdecl_default_implied_state3, 'L', 0, attlistdecl_default_implied_state4, INVALID_DEFAULTDECL)
SINGLE_CHAR_STATE(attlistdecl_default_implied_state4, 'I', 0, attlistdecl_default_implied_state5, INVALID_DEFAULTDECL)
SINGLE_CHAR_STATE(attlistdecl_default_implied_state5, 'E', 0, attlistdecl_default_implied_state6, INVALID_DEFAULTDECL)
-SINGLE_CHAR_STATE_RETURN(attlistdecl_default_implied_state6, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_IMPLIED_TOKEN)
+SINGLE_CHAR_STATE_RETURN(attlistdecl_default_implied_state6, 'D', 0, attlistdecl_attdef_name_ws_state1, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_IMPLIED_TOKEN)
SINGLE_CHAR_STATE(attlistdecl_default_required_state1, 'E', 0, attlistdecl_default_required_state2, INVALID_DEFAULTDECL)
SINGLE_CHAR_STATE(attlistdecl_default_required_state2, 'Q', 0, attlistdecl_default_required_state3, INVALID_DEFAULTDECL)
@@ -698,7 +1066,7 @@
SINGLE_CHAR_STATE(attlistdecl_default_required_state4, 'I', 0, attlistdecl_default_required_state5, INVALID_DEFAULTDECL)
SINGLE_CHAR_STATE(attlistdecl_default_required_state5, 'R', 0, attlistdecl_default_required_state6, INVALID_DEFAULTDECL)
SINGLE_CHAR_STATE(attlistdecl_default_required_state6, 'E', 0, attlistdecl_default_required_state7, INVALID_DEFAULTDECL)
-SINGLE_CHAR_STATE_RETURN(attlistdecl_default_required_state7, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_REQUIRED_TOKEN)
+SINGLE_CHAR_STATE_RETURN(attlistdecl_default_required_state7, 'D', 0, attlistdecl_attdef_name_ws_state1, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_REQUIRED_TOKEN)
SINGLE_CHAR_STATE(attlistdecl_default_fixed_state1, 'I', 0, attlistdecl_default_fixed_state2, INVALID_DEFAULTDECL)
SINGLE_CHAR_STATE(attlistdecl_default_fixed_state2, 'X', 0, attlistdecl_default_fixed_state3, INVALID_DEFAULTDECL)
@@ -748,7 +1116,7 @@
switch(env->current_char) {
case '\'':
- env->state = attlistdecl_attdef_name_state1;
+ env->state = attlistdecl_attdef_name_ws_state1;
token_end_position(env);
report_token(ATTRIBUTE_VALUE_TOKEN, env);
next_char(env);
@@ -807,7 +1175,7 @@
switch(env->current_char) {
case '"':
- env->state = attlistdecl_attdef_name_state1;
+ env->state = attlistdecl_attdef_name_ws_state1;
token_end_position(env);
report_token(ATTRIBUTE_VALUE_TOKEN, env);
next_char(env);
Modified: trunk/faxpp/src/elementdecl.c
===================================================================
--- trunk/faxpp/src/elementdecl.c 2008-03-15 10:59:42 UTC (rev 44)
+++ trunk/faxpp/src/elementdecl.c 2008-03-17 10:41:03 UTC (rev 45)
@@ -67,29 +67,57 @@
SINGLE_CHAR_STATE(elementdecl_initial_state2, 'M', 0, elementdecl_initial_state3, INVALID_DTD_DECL)
SINGLE_CHAR_STATE(elementdecl_initial_state3, 'E', 0, elementdecl_initial_state4, INVALID_DTD_DECL)
SINGLE_CHAR_STATE(elementdecl_initial_state4, 'N', 0, elementdecl_initial_state5, INVALID_DTD_DECL)
-SINGLE_CHAR_STATE(elementdecl_initial_state5, 'T', 0, elementdecl_name_ws_state, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state5, 'T', 0, elementdecl_name_ws_state1, INVALID_DTD_DECL)
FAXPP_Error
-elementdecl_name_ws_state(FAXPP_TokenizerEnv *env)
+elementdecl_name_ws_state1(FAXPP_TokenizerEnv *env)
{
read_char(env);
switch(env->current_char) {
WHITESPACE:
+ env->state = elementdecl_name_ws_state2;
next_char(env);
break;
case '%':
- // TBD only for external subset - jpcs
store_state(env);
env->state = parameter_entity_reference_in_markup_state;
next_char(env);
token_start_position(env);
- return NO_ERROR;
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
default:
env->state = elementdecl_name_state1;
token_start_position(env);
// No next_char
+ return EXPECTING_WHITESPACE;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_name_ws_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = elementdecl_name_state1;
+ token_start_position(env);
+ // No next_char
+ break;
}
return NO_ERROR;
}
@@ -120,11 +148,11 @@
switch(env->current_char) {
WHITESPACE:
- env->stored_state = elementdecl_content_state;
- env->state = ws_state;
+ case '%':
+ env->state = elementdecl_content_ws_state1;
token_end_position(env);
report_token(ELEMENTDECL_NAME_TOKEN, env);
- next_char(env);
+ // No next_char
return NO_ERROR;
case ':':
env->state = elementdecl_name_seen_colon_state1;
@@ -172,11 +200,11 @@
switch(env->current_char) {
WHITESPACE:
- env->stored_state = elementdecl_content_state;
- env->state = ws_state;
+ case '%':
+ env->state = elementdecl_content_ws_state1;
token_end_position(env);
report_token(ELEMENTDECL_NAME_TOKEN, env);
- next_char(env);
+ // No next_char
return NO_ERROR;
default:
break;
@@ -192,6 +220,59 @@
}
FAXPP_Error
+elementdecl_content_ws_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->state = elementdecl_content_ws_state2;
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = elementdecl_content_state;
+ token_start_position(env);
+ // No next_char
+ return EXPECTING_WHITESPACE;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+elementdecl_content_ws_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+ default:
+ env->state = elementdecl_content_state;
+ token_start_position(env);
+ // No next_char
+ break;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
elementdecl_content_state(FAXPP_TokenizerEnv *env)
{
read_char(env);
@@ -205,8 +286,7 @@
break;
case '(':
env->elemdecl_content_level += 1;
- env->stored_state = elementdecl_mixed_or_children_state;
- env->state = ws_state;
+ env->state = elementdecl_mixed_or_children_ws_state;
report_empty_token(ELEMENTDECL_LPAR_TOKEN, env);
break;
LINE_ENDINGS
@@ -221,12 +301,38 @@
SINGLE_CHAR_STATE(elementdecl_empty_state1, 'M', 0, elementdecl_empty_state2, INVALID_ELEMENTDECL_CONTENT)
SINGLE_CHAR_STATE(elementdecl_empty_state2, 'P', 0, elementdecl_empty_state3, INVALID_ELEMENTDECL_CONTENT)
SINGLE_CHAR_STATE(elementdecl_empty_state3, 'T', 0, elementdecl_empty_state4, INVALID_ELEMENTDECL_CONTENT)
-SINGLE_CHAR_STATE_RETURN(elementdecl_empty_state4, 'Y', elementdecl_end_state, ws_state, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_EMPTY_TOKEN)
+SINGLE_CHAR_STATE_RETURN(elementdecl_empty_state4, 'Y', 0, elementdecl_end_ws_state, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_EMPTY_TOKEN)
SINGLE_CHAR_STATE(elementdecl_a...
[truncated message content] |
|
From: <jp...@us...> - 2008-03-20 01:56:14
|
Revision: 47
http://faxpp.svn.sourceforge.net/faxpp/?rev=47&view=rev
Author: jpcs
Date: 2008-03-19 18:56:21 -0700 (Wed, 19 Mar 2008)
Log Message:
-----------
Added recognition of parameter entities in entity and notation
declarations.
Raise errors for redeclaration of "xml" and "xmlns" namespaces.
Validate the value of "xml:space" attributes.
Modified Paths:
--------------
trunk/faxpp/Makefile.am
trunk/faxpp/Makefile.in
trunk/faxpp/TODO
trunk/faxpp/include/faxpp/error.h
trunk/faxpp/src/doctype.c
trunk/faxpp/src/entitydecl.c
trunk/faxpp/src/error.c
trunk/faxpp/src/notationdecl.c
trunk/faxpp/src/tokenizer_states.c
trunk/faxpp/src/tokenizer_states.h
trunk/faxpp/src/xml_parser.c
trunk/faxpp/tests/xmlconf_runner.c
Added Paths:
-----------
trunk/faxpp/src/system_public_states.h
Modified: trunk/faxpp/Makefile.am
===================================================================
--- trunk/faxpp/Makefile.am 2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/Makefile.am 2008-03-20 01:56:21 UTC (rev 47)
@@ -37,7 +37,8 @@
src/attlistdecl.c \
src/notationdecl.c \
src/entitydecl.c \
-src/conditional.c
+src/conditional.c \
+src/system_public_states.h
tokenizer_example_LDADD = libfaxpp.la
tokenizer_example_SOURCES = examples/tokenizer_example.c
@@ -46,13 +47,17 @@
parser_example_SOURCES = \
examples/parser_example.c \
examples/entity_resolver.c \
-examples/output_event.c
+examples/entity_resolver.h \
+examples/output_event.c \
+examples/output_event.h
xmlconf_runner_LDADD = libfaxpp.la
xmlconf_runner_SOURCES = \
tests/xmlconf_runner.c \
examples/entity_resolver.c \
-examples/output_event.c
+examples/entity_resolver.h \
+examples/output_event.c \
+examples/output_event.h
EXTRA_DIST = \
docs/Doxyfile.api \
Modified: trunk/faxpp/Makefile.in
===================================================================
--- trunk/faxpp/Makefile.in 2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/Makefile.in 2008-03-20 01:56:21 UTC (rev 47)
@@ -260,7 +260,8 @@
src/attlistdecl.c \
src/notationdecl.c \
src/entitydecl.c \
-src/conditional.c
+src/conditional.c \
+src/system_public_states.h
tokenizer_example_LDADD = libfaxpp.la
tokenizer_example_SOURCES = examples/tokenizer_example.c
@@ -268,13 +269,17 @@
parser_example_SOURCES = \
examples/parser_example.c \
examples/entity_resolver.c \
-examples/output_event.c
+examples/entity_resolver.h \
+examples/output_event.c \
+examples/output_event.h
xmlconf_runner_LDADD = libfaxpp.la
xmlconf_runner_SOURCES = \
tests/xmlconf_runner.c \
examples/entity_resolver.c \
-examples/output_event.c
+examples/entity_resolver.h \
+examples/output_event.c \
+examples/output_event.h
EXTRA_DIST = \
docs/Doxyfile.api \
Modified: trunk/faxpp/TODO
===================================================================
--- trunk/faxpp/TODO 2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/TODO 2008-03-20 01:56:21 UTC (rev 47)
@@ -1,17 +1,11 @@
Small tasks
-----------
-Don't accept Namespace 1.1 undefines in XML 1.0 mode
Normalize line endings in element character content / PI values / comment values
Accept XML 1.1 line endings as whitespace
-Handle "xml" namespace properly
-xml:space value checking
-Error for redefining "xml" namespace
-Error for defining "xmlns" namespace
-Parse element decls correctly
-Parse parameter entities in markup correctly
Large tasks
-----------
+Attribute default values
DTD validation
Modified: trunk/faxpp/include/faxpp/error.h
===================================================================
--- trunk/faxpp/include/faxpp/error.h 2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/include/faxpp/error.h 2008-03-20 01:56:21 UTC (rev 47)
@@ -75,7 +75,9 @@
ELEMENT_NAME_MISMATCH,
NO_URI_FOR_PREFIX,
- DUPLICATE_ATTRIBUTES
+ DUPLICATE_ATTRIBUTES,
+ INVALID_NAMESPACE_DECLARATION,
+ INVALID_XMLSPACE_VALUE
} FAXPP_Error;
/**
Modified: trunk/faxpp/src/doctype.c
===================================================================
--- trunk/faxpp/src/doctype.c 2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/src/doctype.c 2008-03-20 01:56:21 UTC (rev 47)
@@ -198,12 +198,10 @@
WHITESPACE:
break;
case 'S':
- env->stored_state = doctype_internal_subset_start_state;
- env->state = system_id_initial_state1;
+ env->state = doctype_system_id_initial_state1;
break;
case 'P':
- env->stored_state = doctype_internal_subset_start_state;
- env->state = public_id_initial_state1;
+ env->state = doctype_public_id_initial_state1;
break;
case '[':
env->state = internal_subset_state;
@@ -223,292 +221,15 @@
return NO_ERROR;
}
-SINGLE_CHAR_STATE(system_id_initial_state1, 'Y', 0, system_id_initial_state2, INVALID_SYSTEM_ID)
-SINGLE_CHAR_STATE(system_id_initial_state2, 'S', 0, system_id_initial_state3, INVALID_SYSTEM_ID)
-SINGLE_CHAR_STATE(system_id_initial_state3, 'T', 0, system_id_initial_state4, INVALID_SYSTEM_ID)
-SINGLE_CHAR_STATE(system_id_initial_state4, 'E', 0, system_id_initial_state5, INVALID_SYSTEM_ID)
-SINGLE_CHAR_STATE(system_id_initial_state5, 'M', 0, system_id_ws_state, INVALID_SYSTEM_ID)
+#define PREFIX(name) doctype_ ## name
+#define END_STATE doctype_internal_subset_start_state
-FAXPP_Error
-system_id_ws_state(FAXPP_TokenizerEnv *env)
-{
- read_char(env);
+#include "system_public_states.h"
- switch(env->current_char) {
- WHITESPACE:
- env->state = system_literal_start_state;
- next_char(env);
- break;
- default:
- env->state = system_literal_start_state;
- return EXPECTING_WHITESPACE;
- }
- return NO_ERROR;
-}
+#undef END_STATE
+#undef PREFIX
FAXPP_Error
-system_literal_start_state(FAXPP_TokenizerEnv *env)
-{
- read_char(env);
-
- switch(env->current_char) {
- WHITESPACE:
- next_char(env);
- return NO_ERROR;
- case '"':
- env->state = system_literal_quot_state;
- break;
- case '\'':
- env->state = system_literal_apos_state;
- break;
- default:
- next_char(env);
- return EXPECTING_SYSTEM_LITERAL;
- }
- next_char(env);
- token_start_position(env);
- return NO_ERROR;
-}
-
-FAXPP_Error
-system_literal_apos_state(FAXPP_TokenizerEnv *env)
-{
- while(1) {
- read_char(env);
-
- switch(env->current_char) {
- case '\'':
- retrieve_state(env);
- token_end_position(env);
- report_token(SYSTEM_LITERAL_TOKEN, env);
- next_char(env);
- return NO_ERROR;
- LINE_ENDINGS
- default:
- if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
- next_char(env);
- return RESTRICTED_CHAR;
- }
- break;
- }
- next_char(env);
- }
-
- // Never happens
- return NO_ERROR;
-}
-
-FAXPP_Error
-system_literal_quot_state(FAXPP_TokenizerEnv *env)
-{
- while(1) {
- read_char(env);
-
- switch(env->current_char) {
- case '"':
- retrieve_state(env);
- token_end_position(env);
- report_token(SYSTEM_LITERAL_TOKEN, env);
- next_char(env);
- return NO_ERROR;
- LINE_ENDINGS
- default:
- if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
- next_char(env);
- return RESTRICTED_CHAR;
- }
- break;
- }
- next_char(env);
- }
-
- // Never happens
- return NO_ERROR;
-}
-
-SINGLE_CHAR_STATE(public_id_initial_state1, 'U', 0, public_id_initial_state2, INVALID_PUBLIC_ID)
-SINGLE_CHAR_STATE(public_id_initial_state2, 'B', 0, public_id_initial_state3, INVALID_PUBLIC_ID)
-SINGLE_CHAR_STATE(public_id_initial_state3, 'L', 0, public_id_initial_state4, INVALID_PUBLIC_ID)
-SINGLE_CHAR_STATE(public_id_initial_state4, 'I', 0, public_id_initial_state5, INVALID_PUBLIC_ID)
-SINGLE_CHAR_STATE(public_id_initial_state5, 'C', 0, public_id_ws_state, INVALID_PUBLIC_ID)
-
-FAXPP_Error
-public_id_ws_state(FAXPP_TokenizerEnv *env)
-{
- read_char(env);
-
- switch(env->current_char) {
- WHITESPACE:
- env->state = pubid_literal_start_state;
- next_char(env);
- break;
- default:
- env->state = pubid_literal_start_state;
- return EXPECTING_WHITESPACE;
- }
- return NO_ERROR;
-}
-
-FAXPP_Error
-pubid_literal_start_state(FAXPP_TokenizerEnv *env)
-{
- read_char(env);
-
- switch(env->current_char) {
- WHITESPACE:
- next_char(env);
- return NO_ERROR;
- case '"':
- env->state = pubid_literal_quot_state;
- break;
- case '\'':
- env->state = pubid_literal_apos_state;
- break;
- default:
- next_char(env);
- return EXPECTING_PUBID_LITERAL;
- }
- next_char(env);
- token_start_position(env);
- return NO_ERROR;
-}
-
-FAXPP_Error
-pubid_literal_apos_state(FAXPP_TokenizerEnv *env)
-{
- while(1) {
- read_char(env);
-
- switch(env->current_char) {
- case '\'':
- env->state = public_id_ws_state2;
- token_end_position(env);
- report_token(PUBID_LITERAL_TOKEN, env);
- next_char(env);
- return NO_ERROR;
- // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
- LINE_ENDINGS
- // A-Z
- case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
- case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F:
- case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57:
- case 0x58: case 0x59: case 0x5A:
- // a-z
- case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67:
- case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
- case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
- case 0x78: case 0x79: case 0x7A:
- // 0-9
- case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8':
- case '9':
- case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':':
- case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_':
- case '%':
- // Valid PubidChar
- break;
- default:
- next_char(env);
- return INVALID_CHAR_IN_PUBID_LITERAL;
- }
- next_char(env);
- }
-
- // Never happens
- return NO_ERROR;
-}
-
-FAXPP_Error
-pubid_literal_quot_state(FAXPP_TokenizerEnv *env)
-{
- while(1) {
- read_char(env);
-
- switch(env->current_char) {
- case '"':
- env->state = public_id_ws_state2;
- token_end_position(env);
- report_token(PUBID_LITERAL_TOKEN, env);
- next_char(env);
- return NO_ERROR;
- // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
- LINE_ENDINGS
- // A-Z
- case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
- case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F:
- case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57:
- case 0x58: case 0x59: case 0x5A:
- // a-z
- case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67:
- case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
- case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
- case 0x78: case 0x79: case 0x7A:
- // 0-9
- case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8':
- case '9':
- case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':':
- case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_':
- case '%': case '\'':
- // Valid PubidChar
- break;
- default:
- next_char(env);
- return INVALID_CHAR_IN_PUBID_LITERAL;
- }
- next_char(env);
- }
-
- // Never happens
- return NO_ERROR;
-}
-
-FAXPP_Error
-public_id_ws_state2(FAXPP_TokenizerEnv *env)
-{
- read_char(env);
-
- switch(env->current_char) {
- WHITESPACE:
- env->state = public_id_ws_state3;
- next_char(env);
- break;
- case '>':
- if(env->stored_state == notationdecl_end_state) {
- // Notation decls can skip the system literal
- retrieve_state(env);
- return NO_ERROR;
- }
- // Fall through
- default:
- env->state = system_literal_start_state;
- return EXPECTING_WHITESPACE;
- }
- return NO_ERROR;
-}
-
-FAXPP_Error
-public_id_ws_state3(FAXPP_TokenizerEnv *env)
-{
- read_char(env);
-
- switch(env->current_char) {
- WHITESPACE:
- next_char(env);
- break;
- case '>':
- if(env->stored_state == notationdecl_end_state) {
- // Notation decls can skip the system literal
- retrieve_state(env);
- return NO_ERROR;
- }
- // Fall through
- default:
- env->state = system_literal_start_state;
- break;
- }
- return NO_ERROR;
-}
-
-FAXPP_Error
doctype_internal_subset_start_state(FAXPP_TokenizerEnv *env)
{
read_char(env);
Modified: trunk/faxpp/src/entitydecl.c
===================================================================
--- trunk/faxpp/src/entitydecl.c 2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/src/entitydecl.c 2008-03-20 01:56:21 UTC (rev 47)
@@ -111,12 +111,10 @@
token_start_position(env);
return NO_ERROR;
case 'S':
- env->stored_state = entitydecl_ws_state;
- env->state = system_id_initial_state1;
+ env->state = entitydecl_system_id_initial_state1;
break;
case 'P':
- env->stored_state = entitydecl_ws_state;
- env->state = public_id_initial_state1;
+ env->state = entitydecl_public_id_initial_state1;
break;
LINE_ENDINGS
default:
@@ -127,6 +125,16 @@
return NO_ERROR;
}
+#define PREFIX(name) entitydecl_ ## name
+#define END_STATE entitydecl_ws_state
+#define ALLOW_PARAMETER_ENTITIES
+
+#include "system_public_states.h"
+
+#undef ALLOW_PARAMETER_ENTITIES
+#undef END_STATE
+#undef PREFIX
+
FAXPP_Error
entitydecl_value_apos_state(FAXPP_TokenizerEnv *env)
{
@@ -422,12 +430,10 @@
token_start_position(env);
return NO_ERROR;
case 'S':
- env->stored_state = paramentitydecl_end_state;
- env->state = system_id_initial_state1;
+ env->state = paramentitydecl_system_id_initial_state1;
break;
case 'P':
- env->stored_state = paramentitydecl_end_state;
- env->state = public_id_initial_state1;
+ env->state = paramentitydecl_public_id_initial_state1;
break;
LINE_ENDINGS
default:
@@ -438,6 +444,16 @@
return NO_ERROR;
}
+#define PREFIX(name) paramentitydecl_ ## name
+#define END_STATE paramentitydecl_end_state
+#define ALLOW_PARAMETER_ENTITIES
+
+#include "system_public_states.h"
+
+#undef ALLOW_PARAMETER_ENTITIES
+#undef END_STATE
+#undef PREFIX
+
FAXPP_Error
paramentitydecl_value_apos_state(FAXPP_TokenizerEnv *env)
{
Modified: trunk/faxpp/src/error.c
===================================================================
--- trunk/faxpp/src/error.c 2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/src/error.c 2008-03-20 01:56:21 UTC (rev 47)
@@ -137,6 +137,10 @@
return "IMPROPER_NESTING_OF_ENTITY";
case PARAMETER_ENTITY_IN_INTERNAL_SUBSET:
return "PARAMETER_ENTITY_IN_INTERNAL_SUBSET";
+ case INVALID_NAMESPACE_DECLARATION:
+ return "INVALID_NAMESPACE_DECLARATION";
+ case INVALID_XMLSPACE_VALUE:
+ return "INVALID_XMLSPACE_VALUE";
case NO_ERROR:
break;
}
Modified: trunk/faxpp/src/notationdecl.c
===================================================================
--- trunk/faxpp/src/notationdecl.c 2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/src/notationdecl.c 2008-03-20 01:56:21 UTC (rev 47)
@@ -97,12 +97,10 @@
switch(env->current_char) {
case 'S':
- env->stored_state = notationdecl_end_state;
- env->state = system_id_initial_state1;
+ env->state = notationdecl_system_id_initial_state1;
break;
case 'P':
- env->stored_state = notationdecl_end_state;
- env->state = public_id_initial_state1;
+ env->state = notationdecl_public_id_initial_state1;
break;
LINE_ENDINGS
default:
@@ -113,6 +111,18 @@
return NO_ERROR;
}
+#define PREFIX(name) notationdecl_ ## name
+#define END_STATE notationdecl_end_state
+#define SKIP_SYSTEM_LITERAL
+#define ALLOW_PARAMETER_ENTITIES
+
+#include "system_public_states.h"
+
+#undef ALLOW_PARAMETER_ENTITIES
+#undef SKIP_SYSTEM_LITERAL
+#undef END_STATE
+#undef PREFIX
+
FAXPP_Error
notationdecl_end_state(FAXPP_TokenizerEnv *env)
{
Added: trunk/faxpp/src/system_public_states.h
===================================================================
--- trunk/faxpp/src/system_public_states.h (rev 0)
+++ trunk/faxpp/src/system_public_states.h 2008-03-20 01:56:21 UTC (rev 47)
@@ -0,0 +1,392 @@
+/*
+ * Copyright 2007 Doxological Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file needs to have a number of macros defined before it is included
+// PREFIX(name)
+// END_STATE
+// SKIP_SYSTEM_LITERAL
+// ALLOW_PARAMETER_ENTITIES
+
+#define SP_SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \
+FAXPP_Error \
+name(FAXPP_TokenizerEnv *env) \
+{ \
+ read_char(env); \
+\
+ switch(env->current_char) { \
+ case (ch): \
+ if((next_stored_state) != 0) env->stored_state = (next_stored_state); \
+ env->state = (next_state); \
+ next_char(env); \
+ break; \
+ LINE_ENDINGS \
+ default: \
+ next_char(env); \
+ return (error); \
+ } \
+ return NO_ERROR; \
+}
+
+SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state1), 'Y', 0, PREFIX(system_id_initial_state2), INVALID_SYSTEM_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state2), 'S', 0, PREFIX(system_id_initial_state3), INVALID_SYSTEM_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state3), 'T', 0, PREFIX(system_id_initial_state4), INVALID_SYSTEM_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state4), 'E', 0, PREFIX(system_id_initial_state5), INVALID_SYSTEM_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state5), 'M', 0, PREFIX(system_id_ws_state), INVALID_SYSTEM_ID)
+
+FAXPP_Error
+PREFIX(system_id_ws_state)(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->state = PREFIX(system_literal_start_state);
+ next_char(env);
+ break;
+#ifdef ALLOW_PARAMETER_ENTITIES
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+#endif
+ default:
+ env->state = PREFIX(system_literal_start_state);
+ return EXPECTING_WHITESPACE;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(system_literal_start_state)(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ return NO_ERROR;
+#ifdef ALLOW_PARAMETER_ENTITIES
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+#endif
+ case '"':
+ env->state = PREFIX(system_literal_quot_state);
+ break;
+ case '\'':
+ env->state = PREFIX(system_literal_apos_state);
+ break;
+ default:
+ next_char(env);
+ return EXPECTING_SYSTEM_LITERAL;
+ }
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(system_literal_apos_state)(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ case '\'':
+ env->state = END_STATE;
+ token_end_position(env);
+ report_token(SYSTEM_LITERAL_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '#':
+ next_char(env);
+ return INVALID_SYSTEM_ID;
+ LINE_ENDINGS
+ default:
+ if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+ next_char(env);
+ return RESTRICTED_CHAR;
+ }
+ break;
+ }
+ next_char(env);
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(system_literal_quot_state)(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ case '"':
+ env->state = END_STATE;
+ token_end_position(env);
+ report_token(SYSTEM_LITERAL_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '#':
+ next_char(env);
+ return INVALID_SYSTEM_ID;
+ LINE_ENDINGS
+ default:
+ if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+ next_char(env);
+ return RESTRICTED_CHAR;
+ }
+ break;
+ }
+ next_char(env);
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state1), 'U', 0, PREFIX(public_id_initial_state2), INVALID_PUBLIC_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state2), 'B', 0, PREFIX(public_id_initial_state3), INVALID_PUBLIC_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state3), 'L', 0, PREFIX(public_id_initial_state4), INVALID_PUBLIC_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state4), 'I', 0, PREFIX(public_id_initial_state5), INVALID_PUBLIC_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state5), 'C', 0, PREFIX(public_id_ws_state), INVALID_PUBLIC_ID)
+
+FAXPP_Error
+PREFIX(public_id_ws_state)(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->state = PREFIX(pubid_literal_start_state);
+ next_char(env);
+ break;
+#ifdef ALLOW_PARAMETER_ENTITIES
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+#endif
+ default:
+ env->state = PREFIX(pubid_literal_start_state);
+ return EXPECTING_WHITESPACE;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(pubid_literal_start_state)(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ return NO_ERROR;
+#ifdef ALLOW_PARAMETER_ENTITIES
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+#endif
+ case '"':
+ env->state = PREFIX(pubid_literal_quot_state);
+ break;
+ case '\'':
+ env->state = PREFIX(pubid_literal_apos_state);
+ break;
+ default:
+ next_char(env);
+ return EXPECTING_PUBID_LITERAL;
+ }
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(pubid_literal_apos_state)(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ case '\'':
+ env->state = PREFIX(public_id_ws_state2);
+ token_end_position(env);
+ report_token(PUBID_LITERAL_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
+ LINE_ENDINGS
+ // A-Z
+ case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
+ case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F:
+ case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57:
+ case 0x58: case 0x59: case 0x5A:
+ // a-z
+ case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67:
+ case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
+ case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
+ case 0x78: case 0x79: case 0x7A:
+ // 0-9
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8':
+ case '9':
+ case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':':
+ case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_':
+ case '%':
+ // Valid PubidChar
+ break;
+ default:
+ next_char(env);
+ return INVALID_CHAR_IN_PUBID_LITERAL;
+ }
+ next_char(env);
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(pubid_literal_quot_state)(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ case '"':
+ env->state = PREFIX(public_id_ws_state2);
+ token_end_position(env);
+ report_token(PUBID_LITERAL_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
+ LINE_ENDINGS
+ // A-Z
+ case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
+ case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F:
+ case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57:
+ case 0x58: case 0x59: case 0x5A:
+ // a-z
+ case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67:
+ case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
+ case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
+ case 0x78: case 0x79: case 0x7A:
+ // 0-9
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8':
+ case '9':
+ case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':':
+ case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_':
+ case '%': case '\'':
+ // Valid PubidChar
+ break;
+ default:
+ next_char(env);
+ return INVALID_CHAR_IN_PUBID_LITERAL;
+ }
+ next_char(env);
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(public_id_ws_state2)(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->state = PREFIX(public_id_ws_state3);
+ next_char(env);
+ break;
+#ifdef ALLOW_PARAMETER_ENTITIES
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+#endif
+#ifdef SKIP_SYSTEM_LITERAL
+ case '>':
+ // Notation decls can skip the system literal
+ env->state = END_STATE;
+ return NO_ERROR;
+#endif
+ default:
+ env->state = PREFIX(system_literal_start_state);
+ return EXPECTING_WHITESPACE;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(public_id_ws_state3)(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+#ifdef ALLOW_PARAMETER_ENTITIES
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+ return NO_ERROR;
+ return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+#endif
+#ifdef SKIP_SYSTEM_LITERAL
+ case '>':
+ // Notation decls can skip the system literal
+ env->state = END_STATE;
+ return NO_ERROR;
+#endif
+ default:
+ env->state = PREFIX(system_literal_start_state);
+ break;
+ }
+ return NO_ERROR;
+}
+
+#undef SP_SINGLE_CHAR_STATE
Modified: trunk/faxpp/src/tokenizer_states.c
===================================================================
--- trunk/faxpp/src/tokenizer_states.c 2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/src/tokenizer_states.c 2008-03-20 01:56:21 UTC (rev 47)
@@ -450,47 +450,47 @@
else if(state == external_subset_decl_state)
return "external_subset_decl_state";
- else if(state == system_id_initial_state1)
- return "system_id_initial_state1";
- else if(state == system_id_initial_state2)
- return "system_id_initial_state2";
- else if(state == system_id_initial_state3)
- return "system_id_initial_state3";
- else if(state == system_id_initial_state4)
- return "system_id_initial_state4";
- else if(state == system_id_initial_state5)
- return "system_id_initial_state5";
- else if(state == system_id_ws_state)
- return "system_id_ws_state";
- else if(state == system_literal_start_state)
- return "system_literal_start_state";
- else if(state == system_literal_apos_state)
- return "system_literal_apos_state";
- else if(state == system_literal_quot_state)
- return "system_literal_quot_state";
+ else if(state == doctype_system_id_initial_state1)
+ return "doctype_system_id_initial_state1";
+ else if(state == doctype_system_id_initial_state2)
+ return "doctype_system_id_initial_state2";
+ else if(state == doctype_system_id_initial_state3)
+ return "doctype_system_id_initial_state3";
+ else if(state == doctype_system_id_initial_state4)
+ return "doctype_system_id_initial_state4";
+ else if(state == doctype_system_id_initial_state5)
+ return "doctype_system_id_initial_state5";
+ else if(state == doctype_system_id_ws_state)
+ return "doctype_system_id_ws_state";
+ else if(state == doctype_system_literal_start_state)
+ return "doctype_system_literal_start_state";
+ else if(state == doctyp...
[truncated message content] |