[Faxpp-devel] SF.net SVN: faxpp: [38] trunk/faxpp
Status: Beta
Brought to you by:
jpcs
|
From: <jp...@us...> - 2008-02-27 09:44:20
|
Revision: 38
http://faxpp.svn.sourceforge.net/faxpp/?rev=38&view=rev
Author: jpcs
Date: 2008-02-27 01:44:24 -0800 (Wed, 27 Feb 2008)
Log Message:
-----------
Added code to tokenize entity declarations.
Modified Paths:
--------------
trunk/faxpp/Makefile.am
trunk/faxpp/Makefile.in
trunk/faxpp/include/faxpp/error.h
trunk/faxpp/include/faxpp/token.h
trunk/faxpp/src/attlistdecl.c
trunk/faxpp/src/doctype.c
trunk/faxpp/src/elementdecl.c
trunk/faxpp/src/error.c
trunk/faxpp/src/notationdecl.c
trunk/faxpp/src/token.c
trunk/faxpp/src/tokenizer_states.c
trunk/faxpp/src/tokenizer_states.h
trunk/faxpp/src/xml_parser.c
trunk/faxpp/src/xml_parser.h
Added Paths:
-----------
trunk/faxpp/src/entitydecl.c
Modified: trunk/faxpp/Makefile.am
===================================================================
--- trunk/faxpp/Makefile.am 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/Makefile.am 2008-02-27 09:44:24 UTC (rev 38)
@@ -35,7 +35,8 @@
src/doctype.c \
src/elementdecl.c \
src/attlistdecl.c \
-src/notationdecl.c
+src/notationdecl.c \
+src/entitydecl.c
tokenizer_example_SOURCES = examples/tokenizer_example.c
tokenizer_example_LDADD = libfaxpp.la
Modified: trunk/faxpp/Makefile.in
===================================================================
--- trunk/faxpp/Makefile.in 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/Makefile.in 2008-02-27 09:44:24 UTC (rev 38)
@@ -64,7 +64,7 @@
element.lo error.lo event.lo pi.lo reference.lo token.lo \
tokenizer_states.lo transcode.lo xmldecl.lo xml_parser.lo \
xml_tokenizer.lo doctype.lo elementdecl.lo attlistdecl.lo \
- notationdecl.lo
+ notationdecl.lo entitydecl.lo
libfaxpp_la_OBJECTS = $(am_libfaxpp_la_OBJECTS)
libfaxpp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
@@ -256,7 +256,8 @@
src/doctype.c \
src/elementdecl.c \
src/attlistdecl.c \
-src/notationdecl.c
+src/notationdecl.c \
+src/entitydecl.c
tokenizer_example_SOURCES = examples/tokenizer_example.c
tokenizer_example_LDADD = libfaxpp.la
@@ -386,6 +387,7 @@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doctype.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/element.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elementdecl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/entitydecl.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/event.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/notationdecl.Plo@am__quote@
@@ -555,6 +557,13 @@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o notationdecl.lo `test -f 'src/notationdecl.c' || echo '$(srcdir)/'`src/notationdecl.c
+entitydecl.lo: src/entitydecl.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT entitydecl.lo -MD -MP -MF $(DEPDIR)/entitydecl.Tpo -c -o entitydecl.lo `test -f 'src/entitydecl.c' || echo '$(srcdir)/'`src/entitydecl.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/entitydecl.Tpo $(DEPDIR)/entitydecl.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='src/entitydecl.c' object='entitydecl.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o entitydecl.lo `test -f 'src/entitydecl.c' || echo '$(srcdir)/'`src/entitydecl.c
+
parser_example.o: examples/parser_example.c
@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT parser_example.o -MD -MP -MF $(DEPDIR)/parser_example.Tpo -c -o parser_example.o `test -f 'examples/parser_example.c' || echo '$(srcdir)/'`examples/parser_example.c
@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/parser_example.Tpo $(DEPDIR)/parser_example.Po
Modified: trunk/faxpp/include/faxpp/error.h
===================================================================
--- trunk/faxpp/include/faxpp/error.h 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/include/faxpp/error.h 2008-02-27 09:44:24 UTC (rev 38)
@@ -55,6 +55,8 @@
INVALID_CHAR_IN_ELEMENTDECL_NAME,
INVALID_CHAR_IN_ATTLISTDECL_NAME,
INVALID_CHAR_IN_NOTATIONDECL_NAME,
+ INVALID_CHAR_IN_ENTITYDECL_NAME,
+ INVALID_ENTITYDECL,
OUT_OF_MEMORY,
ELEMENT_NAME_MISMATCH,
Modified: trunk/faxpp/include/faxpp/token.h
===================================================================
--- trunk/faxpp/include/faxpp/token.h 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/include/faxpp/token.h 2008-02-27 09:44:24 UTC (rev 38)
@@ -71,6 +71,7 @@
SYSTEM_LITERAL_TOKEN,
PUBID_LITERAL_TOKEN,
+ NDATA_NAME_TOKEN,
ELEMENTDECL_PREFIX_TOKEN,
ELEMENTDECL_NAME_TOKEN,
@@ -83,6 +84,11 @@
NOTATIONDECL_NAME_TOKEN,
NOTATIONDECL_CONTENT_TOKEN,
+ ENTITYDECL_NAME_TOKEN,
+ ENTITYDECL_VALUE_TOKEN,
+ ENTITYDECL_END_TOKEN,
+ PARAMENTITYDECL_NAME_TOKEN,
+
END_OF_BUFFER_TOKEN = 99
} FAXPP_TokenType;
Modified: trunk/faxpp/src/attlistdecl.c
===================================================================
--- trunk/faxpp/src/attlistdecl.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/attlistdecl.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -163,6 +163,7 @@
switch(env->current_char) {
case '>':
base_state(env);
+ token_end_position(env);
report_token(ATTLISTDECL_CONTENT_TOKEN, env);
break;
LINE_ENDINGS
Modified: trunk/faxpp/src/doctype.c
===================================================================
--- trunk/faxpp/src/doctype.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/doctype.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -398,6 +398,9 @@
case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
case 0x78: case 0x79: case 0x7A:
+ // 0-9
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8':
+ case '9':
case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':':
case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_':
case '%':
@@ -439,6 +442,9 @@
case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
case 0x78: case 0x79: case 0x7A:
+ // 0-9
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8':
+ case '9':
case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':':
case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_':
case '%': case '\'':
@@ -544,7 +550,7 @@
env->state = comment_start_state2;
break;
case 'E':
- env->state = elementdecl_initial_state1;
+ env->state = elementdecl_or_entitydecl_state;
break;
case 'A':
env->state = attlistdecl_initial_state1;
Modified: trunk/faxpp/src/elementdecl.c
===================================================================
--- trunk/faxpp/src/elementdecl.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/elementdecl.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -17,6 +17,28 @@
#include "tokenizer_states.h"
#include "char_classes.h"
+FAXPP_Error
+elementdecl_or_entitydecl_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case 'L':
+ env->state = elementdecl_initial_state1;
+ next_char(env);
+ break;
+ case 'N':
+ env->state = entitydecl_initial_state1;
+ next_char(env);
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_DTD_DECL;
+ }
+ return NO_ERROR;
+}
+
#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \
FAXPP_Error \
name(FAXPP_TokenizerEnv *env) \
@@ -37,12 +59,11 @@
return NO_ERROR; \
}
-SINGLE_CHAR_STATE(elementdecl_initial_state1, 'L', 0, elementdecl_initial_state2, INVALID_DTD_DECL)
-SINGLE_CHAR_STATE(elementdecl_initial_state2, 'E', 0, elementdecl_initial_state3, INVALID_DTD_DECL)
-SINGLE_CHAR_STATE(elementdecl_initial_state3, 'M', 0, elementdecl_initial_state4, INVALID_DTD_DECL)
-SINGLE_CHAR_STATE(elementdecl_initial_state4, 'E', 0, elementdecl_initial_state5, INVALID_DTD_DECL)
-SINGLE_CHAR_STATE(elementdecl_initial_state5, 'N', 0, elementdecl_initial_state6, INVALID_DTD_DECL)
-SINGLE_CHAR_STATE(elementdecl_initial_state6, 'T', elementdecl_name_state1, ws_plus_state, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state1, 'E', 0, elementdecl_initial_state2, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state2, 'M', 0, elementdecl_initial_state3, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state3, 'E', 0, elementdecl_initial_state4, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state4, 'N', 0, elementdecl_initial_state5, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state5, 'T', elementdecl_name_state1, ws_plus_state, INVALID_DTD_DECL)
FAXPP_Error
elementdecl_name_state1(FAXPP_TokenizerEnv *env)
@@ -149,6 +170,7 @@
switch(env->current_char) {
case '>':
base_state(env);
+ token_end_position(env);
report_token(ELEMENTDECL_CONTENT_TOKEN, env);
break;
LINE_ENDINGS
Added: trunk/faxpp/src/entitydecl.c
===================================================================
--- trunk/faxpp/src/entitydecl.c (rev 0)
+++ trunk/faxpp/src/entitydecl.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -0,0 +1,574 @@
+/*
+ * Copyright 2007 Doxological Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tokenizer_states.h"
+#include "char_classes.h"
+
+#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \
+FAXPP_Error \
+name(FAXPP_TokenizerEnv *env) \
+{ \
+ read_char(env); \
+\
+ switch(env->current_char) { \
+ case (ch): \
+ if((next_stored_state) != 0) env->stored_state = (next_stored_state); \
+ env->state = (next_state); \
+ next_char(env); \
+ break; \
+ LINE_ENDINGS \
+ default: \
+ next_char(env); \
+ return (error); \
+ } \
+ return NO_ERROR; \
+}
+
+SINGLE_CHAR_STATE(entitydecl_initial_state1, 'T', 0, entitydecl_initial_state2, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(entitydecl_initial_state2, 'I', 0, entitydecl_initial_state3, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(entitydecl_initial_state3, 'T', 0, entitydecl_initial_state4, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(entitydecl_initial_state4, 'Y', entitydecl_param_or_general_state, ws_plus_state, INVALID_DTD_DECL)
+
+FAXPP_Error
+entitydecl_param_or_general_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '%':
+ env->stored_state = paramentitydecl_name_state1;
+ env->state = ws_plus_state;
+ next_char(env);
+ break;
+ LINE_ENDINGS
+ default:
+ env->state = entitydecl_name_state;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ENTITYDECL_NAME;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+FAXPP_Error
+entitydecl_name_state(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = entitydecl_content_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ENTITYDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_content_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '"':
+ env->state = entitydecl_value_quot_state;
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '\'':
+ env->state = entitydecl_value_apos_state;
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case 'S':
+ env->stored_state = entitydecl_ws_state;
+ env->state = system_id_initial_state1;
+ break;
+ case 'P':
+ env->stored_state = entitydecl_ws_state;
+ env->state = public_id_initial_state1;
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_ENTITYDECL;
+ }
+ next_char(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_value_apos_state(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ if(env->position >= env->buffer_end) {
+ if(env->token.value.ptr) {
+ token_end_position(env);
+ if(env->token.value.len != 0) {
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ return NO_ERROR;
+ }
+ }
+ token_start_position(env);
+ return PREMATURE_END_OF_BUFFER;
+ }
+
+ read_char_no_check(env);
+
+ switch(env->current_char) {
+ case '\'':
+ env->state = entitydecl_end_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '&':
+ store_state(env);
+ env->state = reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ LINE_ENDINGS
+ default:
+ if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+ next_char(env);
+ return RESTRICTED_CHAR;
+ }
+ break;
+ }
+ next_char(env);
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_value_quot_state(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ if(env->position >= env->buffer_end) {
+ if(env->token.value.ptr) {
+ token_end_position(env);
+ if(env->token.value.len != 0) {
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ return NO_ERROR;
+ }
+ }
+ token_start_position(env);
+ return PREMATURE_END_OF_BUFFER;
+ }
+
+ read_char_no_check(env);
+
+ switch(env->current_char) {
+ case '"':
+ env->state = entitydecl_end_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '&':
+ store_state(env);
+ env->state = reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ LINE_ENDINGS
+ default:
+ if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+ next_char(env);
+ return RESTRICTED_CHAR;
+ }
+ break;
+ }
+ next_char(env);
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_ws_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->state = entitydecl_ndata_or_end_state;;
+ next_char(env);
+ break;
+ case '>':
+ env->state = entitydecl_end_state;
+ break;
+ default:
+ env->state = entitydecl_ndata_or_end_state;
+ return EXPECTING_WHITESPACE;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_ndata_or_end_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '>':
+ env->state = entitydecl_end_state;
+ break;
+ case 'N':
+ env->state = entitydecl_ndata_state1;
+ next_char(env);
+ break;
+ default:
+ next_char(env);
+ return INVALID_ENTITYDECL;
+ }
+ return NO_ERROR;
+}
+
+SINGLE_CHAR_STATE(entitydecl_ndata_state1, 'D', 0, entitydecl_ndata_state2, INVALID_ENTITYDECL)
+SINGLE_CHAR_STATE(entitydecl_ndata_state2, 'A', 0, entitydecl_ndata_state3, INVALID_ENTITYDECL)
+SINGLE_CHAR_STATE(entitydecl_ndata_state3, 'T', 0, entitydecl_ndata_state4, INVALID_ENTITYDECL)
+SINGLE_CHAR_STATE(entitydecl_ndata_state4, 'A', entitydecl_ndata_name_state1, ws_plus_state, INVALID_ENTITYDECL)
+
+FAXPP_Error
+entitydecl_ndata_name_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = entitydecl_ndata_name_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_ENTITYDECL;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_ndata_name_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->state = entitydecl_end_state;
+ token_end_position(env);
+ report_token(NDATA_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '>':
+ env->state = entitydecl_end_state;
+ token_end_position(env);
+ report_token(NDATA_NAME_TOKEN, env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_ENTITYDECL;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+entitydecl_end_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '>':
+ base_state(env);
+ report_empty_token(ENTITYDECL_END_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ break;
+ default:
+ next_char(env);
+ return INVALID_ENTITYDECL;
+ }
+ return NO_ERROR;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+FAXPP_Error
+paramentitydecl_name_state1(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ default:
+ env->state = paramentitydecl_name_state2;
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0)
+ return INVALID_CHAR_IN_ENTITYDECL_NAME;
+ break;
+ }
+
+ return NO_ERROR;
+}
+
+FAXPP_Error
+paramentitydecl_name_state2(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ env->stored_state = paramentitydecl_content_state;
+ env->state = ws_state;
+ token_end_position(env);
+ report_token(PARAMENTITYDECL_NAME_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ default:
+ break;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ENTITYDECL_NAME;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+paramentitydecl_content_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '"':
+ env->state = paramentitydecl_value_quot_state;
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '\'':
+ env->state = paramentitydecl_value_apos_state;
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case 'S':
+ env->stored_state = paramentitydecl_end_state;
+ env->state = system_id_initial_state1;
+ break;
+ case 'P':
+ env->stored_state = paramentitydecl_end_state;
+ env->state = public_id_initial_state1;
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_ENTITYDECL;
+ }
+ next_char(env);
+ return NO_ERROR;
+}
+
+FAXPP_Error
+paramentitydecl_value_apos_state(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ if(env->position >= env->buffer_end) {
+ if(env->token.value.ptr) {
+ token_end_position(env);
+ if(env->token.value.len != 0) {
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ return NO_ERROR;
+ }
+ }
+ token_start_position(env);
+ return PREMATURE_END_OF_BUFFER;
+ }
+
+ read_char_no_check(env);
+
+ switch(env->current_char) {
+ case '\'':
+ env->state = paramentitydecl_end_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '&':
+ store_state(env);
+ env->state = reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ LINE_ENDINGS
+ default:
+ if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+ next_char(env);
+ return RESTRICTED_CHAR;
+ }
+ break;
+ }
+ next_char(env);
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ if(env->position >= env->buffer_end) {
+ if(env->token.value.ptr) {
+ token_end_position(env);
+ if(env->token.value.len != 0) {
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ return NO_ERROR;
+ }
+ }
+ token_start_position(env);
+ return PREMATURE_END_OF_BUFFER;
+ }
+
+ read_char_no_check(env);
+
+ switch(env->current_char) {
+ case '"':
+ env->state = paramentitydecl_end_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ return NO_ERROR;
+ case '&':
+ store_state(env);
+ env->state = reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_state;
+ token_end_position(env);
+ report_token(ENTITYDECL_VALUE_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ LINE_ENDINGS
+ default:
+ if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+ next_char(env);
+ return RESTRICTED_CHAR;
+ }
+ break;
+ }
+ next_char(env);
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
+paramentitydecl_end_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '>':
+ base_state(env);
+ report_empty_token(ENTITYDECL_END_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ break;
+ default:
+ next_char(env);
+ return INVALID_ENTITYDECL;
+ }
+ return NO_ERROR;
+}
+
Modified: trunk/faxpp/src/error.c
===================================================================
--- trunk/faxpp/src/error.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/error.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -105,6 +105,10 @@
return "INVALID_CHAR_IN_ATTLISTDECL_NAME";
case INVALID_CHAR_IN_NOTATIONDECL_NAME:
return "INVALID_CHAR_IN_NOTATIONDECL_NAME";
+ case INVALID_CHAR_IN_ENTITYDECL_NAME:
+ return "INVALID_CHAR_IN_ENTITYDECL_NAME";
+ case INVALID_ENTITYDECL:
+ return "INVALID_ENTITYDECL";
case NO_ERROR:
break;
}
Modified: trunk/faxpp/src/notationdecl.c
===================================================================
--- trunk/faxpp/src/notationdecl.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/notationdecl.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -98,6 +98,7 @@
switch(env->current_char) {
case '>':
base_state(env);
+ token_end_position(env);
report_token(NOTATIONDECL_CONTENT_TOKEN, env);
break;
LINE_ENDINGS
Modified: trunk/faxpp/src/token.c
===================================================================
--- trunk/faxpp/src/token.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/token.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -96,6 +96,8 @@
return "SYSTEM_LITERAL_TOKEN";
case PUBID_LITERAL_TOKEN:
return "PUBID_LITERAL_TOKEN";
+ case NDATA_NAME_TOKEN:
+ return "NDATA_NAME_TOKEN";
case ELEMENTDECL_PREFIX_TOKEN:
return "ELEMENTDECL_PREFIX_TOKEN";
@@ -116,6 +118,15 @@
case NOTATIONDECL_CONTENT_TOKEN:
return "NOTATIONDECL_CONTENT_TOKEN";
+ case ENTITYDECL_NAME_TOKEN:
+ return "ENTITYDECL_NAME_TOKEN";
+ case ENTITYDECL_VALUE_TOKEN:
+ return "ENTITYDECL_VALUE_TOKEN";
+ case ENTITYDECL_END_TOKEN:
+ return "ENTITYDECL_END_TOKEN";
+ case PARAMENTITYDECL_NAME_TOKEN:
+ return "PARAMENTITYDECL_NAME_TOKEN";
+
case NO_TOKEN:
break;
}
Modified: trunk/faxpp/src/tokenizer_states.c
===================================================================
--- trunk/faxpp/src/tokenizer_states.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/tokenizer_states.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -460,10 +460,10 @@
else if(state == pubid_literal_quot_state)
return "pubid_literal_quot_state";
+ else if(state == elementdecl_or_entitydecl_state)
+ return "elementdecl_or_entitydecl_state";
else if(state == elementdecl_initial_state1)
return "elementdecl_initial_state1";
- else if(state == elementdecl_initial_state1)
- return "elementdecl_initial_state1";
else if(state == elementdecl_initial_state2)
return "elementdecl_initial_state2";
else if(state == elementdecl_initial_state3)
@@ -472,8 +472,6 @@
return "elementdecl_initial_state4";
else if(state == elementdecl_initial_state5)
return "elementdecl_initial_state5";
- else if(state == elementdecl_initial_state6)
- return "elementdecl_initial_state6";
else if(state == elementdecl_name_state1)
return "elementdecl_name_state1";
else if(state == elementdecl_name_state2)
@@ -533,6 +531,59 @@
else if(state == notationdecl_content_state)
return "notationdecl_content_state";
+ else if(state == entitydecl_initial_state1)
+ return "entitydecl_initial_state1";
+ else if(state == entitydecl_initial_state1)
+ return "entitydecl_initial_state1";
+ else if(state == entitydecl_initial_state2)
+ return "entitydecl_initial_state2";
+ else if(state == entitydecl_initial_state3)
+ return "entitydecl_initial_state3";
+ else if(state == entitydecl_initial_state4)
+ return "entitydecl_initial_state4";
+ else if(state == entitydecl_param_or_general_state)
+ return "entitydecl_param_or_general_state";
+
+ else if(state == entitydecl_name_state)
+ return "entitydecl_name_state";
+ else if(state == entitydecl_content_state)
+ return "entitydecl_content_state";
+ else if(state == entitydecl_value_apos_state)
+ return "entitydecl_value_apos_state";
+ else if(state == entitydecl_value_quot_state)
+ return "entitydecl_value_quot_state";
+ else if(state == entitydecl_ws_state)
+ return "entitydecl_ws_state";
+ else if(state == entitydecl_ndata_or_end_state)
+ return "entitydecl_ndata_or_end_state";
+ else if(state == entitydecl_ndata_state1)
+ return "entitydecl_ndata_state1";
+ else if(state == entitydecl_ndata_state2)
+ return "entitydecl_ndata_state2";
+ else if(state == entitydecl_ndata_state3)
+ return "entitydecl_ndata_state3";
+ else if(state == entitydecl_ndata_state4)
+ return "entitydecl_ndata_state4";
+ else if(state == entitydecl_ndata_name_state1)
+ return "entitydecl_ndata_name_state1";
+ else if(state == entitydecl_ndata_name_state2)
+ return "entitydecl_ndata_name_state2";
+ else if(state == entitydecl_end_state)
+ return "entitydecl_end_state";
+
+ else if(state == paramentitydecl_name_state1)
+ return "paramentitydecl_name_state1";
+ else if(state == paramentitydecl_name_state2)
+ return "paramentitydecl_name_state2";
+ else if(state == paramentitydecl_content_state)
+ return "paramentitydecl_content_state";
+ else if(state == paramentitydecl_value_apos_state)
+ return "paramentitydecl_value_apos_state";
+ else if(state == paramentitydecl_value_quot_state)
+ return "paramentitydecl_value_quot_state";
+ else if(state == paramentitydecl_end_state)
+ return "paramentitydecl_end_state";
+
return "unknown";
}
#endif
Modified: trunk/faxpp/src/tokenizer_states.h
===================================================================
--- trunk/faxpp/src/tokenizer_states.h 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/tokenizer_states.h 2008-02-27 09:44:24 UTC (rev 38)
@@ -279,12 +279,12 @@
FAXPP_Error pubid_literal_apos_state(FAXPP_TokenizerEnv *env);
FAXPP_Error pubid_literal_quot_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_or_entitydecl_state(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_initial_state1(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_initial_state2(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_initial_state3(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_initial_state4(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_initial_state5(FAXPP_TokenizerEnv *env);
-FAXPP_Error elementdecl_initial_state6(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_name_state1(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_name_state2(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env);
@@ -314,7 +314,34 @@
FAXPP_Error notationdecl_name_state2(FAXPP_TokenizerEnv *env);
FAXPP_Error notationdecl_content_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_initial_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_initial_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_initial_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_initial_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_param_or_general_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_name_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_content_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_value_apos_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_value_quot_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_ws_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_ndata_or_end_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_ndata_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_ndata_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_ndata_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_ndata_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_ndata_name_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_ndata_name_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_end_state(FAXPP_TokenizerEnv *env);
+
+FAXPP_Error paramentitydecl_name_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_name_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_content_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_value_apos_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_end_state(FAXPP_TokenizerEnv *env);
+
+
/*********************
*
* Tokenizer Helper Functions
Modified: trunk/faxpp/src/xml_parser.c
===================================================================
--- trunk/faxpp/src/xml_parser.c 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/xml_parser.c 2008-02-27 09:44:24 UTC (rev 38)
@@ -819,6 +819,7 @@
case DOCTYPE_END_TOKEN:
case SYSTEM_LITERAL_TOKEN:
case PUBID_LITERAL_TOKEN:
+ case NDATA_NAME_TOKEN:
case PE_REFERENCE_TOKEN:
case ELEMENTDECL_PREFIX_TOKEN:
case ELEMENTDECL_NAME_TOKEN:
@@ -828,6 +829,10 @@
case ATTLISTDECL_CONTENT_TOKEN:
case NOTATIONDECL_NAME_TOKEN:
case NOTATIONDECL_CONTENT_TOKEN:
+ case ENTITYDECL_NAME_TOKEN:
+ case ENTITYDECL_VALUE_TOKEN:
+ case ENTITYDECL_END_TOKEN:
+ case PARAMENTITYDECL_NAME_TOKEN:
// TBD - jpcs
break;
Modified: trunk/faxpp/src/xml_parser.h
===================================================================
--- trunk/faxpp/src/xml_parser.h 2008-02-25 12:05:13 UTC (rev 37)
+++ trunk/faxpp/src/xml_parser.h 2008-02-27 09:44:24 UTC (rev 38)
@@ -49,6 +49,23 @@
struct FAXPP_ElementInfo_s *prev;
} FAXPP_ElementInfo;
+typedef struct FAXPP_EntityValue_s {
+ FAXPP_TokenType type;
+ FAXPP_Text value;
+
+ struct FAXPP_EntityValue_s *next;
+} FAXPP_EntityValue;
+
+typedef struct FAXPP_EntityInfo_s {
+ FAXPP_Text name;
+
+ FAXPP_EntityValue value;
+
+ FAXPP_Buffer buffer;
+
+ struct FAXPP_EntityInfo_s *prev;
+} FAXPP_EntityInfo;
+
typedef struct FAXPP_ParserEnv_s FAXPP_ParserEnv;
typedef FAXPP_Error (*FAXPP_NextEvent)(FAXPP_ParserEnv *env);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|