[Faxpp-devel] SF.net SVN: faxpp: [38] trunk/faxpp
Status: Beta
Brought to you by:
jpcs
From: <jp...@us...> - 2008-02-27 09:44:20
|
Revision: 38 http://faxpp.svn.sourceforge.net/faxpp/?rev=38&view=rev Author: jpcs Date: 2008-02-27 01:44:24 -0800 (Wed, 27 Feb 2008) Log Message: ----------- Added code to tokenize entity declarations. Modified Paths: -------------- trunk/faxpp/Makefile.am trunk/faxpp/Makefile.in trunk/faxpp/include/faxpp/error.h trunk/faxpp/include/faxpp/token.h trunk/faxpp/src/attlistdecl.c trunk/faxpp/src/doctype.c trunk/faxpp/src/elementdecl.c trunk/faxpp/src/error.c trunk/faxpp/src/notationdecl.c trunk/faxpp/src/token.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_parser.h Added Paths: ----------- trunk/faxpp/src/entitydecl.c Modified: trunk/faxpp/Makefile.am =================================================================== --- trunk/faxpp/Makefile.am 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/Makefile.am 2008-02-27 09:44:24 UTC (rev 38) @@ -35,7 +35,8 @@ src/doctype.c \ src/elementdecl.c \ src/attlistdecl.c \ -src/notationdecl.c +src/notationdecl.c \ +src/entitydecl.c tokenizer_example_SOURCES = examples/tokenizer_example.c tokenizer_example_LDADD = libfaxpp.la Modified: trunk/faxpp/Makefile.in =================================================================== --- trunk/faxpp/Makefile.in 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/Makefile.in 2008-02-27 09:44:24 UTC (rev 38) @@ -64,7 +64,7 @@ element.lo error.lo event.lo pi.lo reference.lo token.lo \ tokenizer_states.lo transcode.lo xmldecl.lo xml_parser.lo \ xml_tokenizer.lo doctype.lo elementdecl.lo attlistdecl.lo \ - notationdecl.lo + notationdecl.lo entitydecl.lo libfaxpp_la_OBJECTS = $(am_libfaxpp_la_OBJECTS) libfaxpp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -256,7 +256,8 @@ src/doctype.c \ src/elementdecl.c \ src/attlistdecl.c \ -src/notationdecl.c +src/notationdecl.c \ +src/entitydecl.c tokenizer_example_SOURCES = examples/tokenizer_example.c tokenizer_example_LDADD = libfaxpp.la @@ -386,6 +387,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doctype.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/element.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elementdecl.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/entitydecl.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/event.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/notationdecl.Plo@am__quote@ @@ -555,6 +557,13 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o notationdecl.lo `test -f 'src/notationdecl.c' || echo '$(srcdir)/'`src/notationdecl.c +entitydecl.lo: src/entitydecl.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT entitydecl.lo -MD -MP -MF $(DEPDIR)/entitydecl.Tpo -c -o entitydecl.lo `test -f 'src/entitydecl.c' || echo '$(srcdir)/'`src/entitydecl.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/entitydecl.Tpo $(DEPDIR)/entitydecl.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='src/entitydecl.c' object='entitydecl.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o entitydecl.lo `test -f 'src/entitydecl.c' || echo '$(srcdir)/'`src/entitydecl.c + parser_example.o: examples/parser_example.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT parser_example.o -MD -MP -MF $(DEPDIR)/parser_example.Tpo -c -o parser_example.o `test -f 'examples/parser_example.c' || echo '$(srcdir)/'`examples/parser_example.c @am__fastdepCC_TRUE@ mv -f $(DEPDIR)/parser_example.Tpo $(DEPDIR)/parser_example.Po Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/include/faxpp/error.h 2008-02-27 09:44:24 UTC (rev 38) @@ -55,6 +55,8 @@ INVALID_CHAR_IN_ELEMENTDECL_NAME, INVALID_CHAR_IN_ATTLISTDECL_NAME, INVALID_CHAR_IN_NOTATIONDECL_NAME, + INVALID_CHAR_IN_ENTITYDECL_NAME, + INVALID_ENTITYDECL, OUT_OF_MEMORY, ELEMENT_NAME_MISMATCH, Modified: trunk/faxpp/include/faxpp/token.h =================================================================== --- trunk/faxpp/include/faxpp/token.h 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/include/faxpp/token.h 2008-02-27 09:44:24 UTC (rev 38) @@ -71,6 +71,7 @@ SYSTEM_LITERAL_TOKEN, PUBID_LITERAL_TOKEN, + NDATA_NAME_TOKEN, ELEMENTDECL_PREFIX_TOKEN, ELEMENTDECL_NAME_TOKEN, @@ -83,6 +84,11 @@ NOTATIONDECL_NAME_TOKEN, NOTATIONDECL_CONTENT_TOKEN, + ENTITYDECL_NAME_TOKEN, + ENTITYDECL_VALUE_TOKEN, + ENTITYDECL_END_TOKEN, + PARAMENTITYDECL_NAME_TOKEN, + END_OF_BUFFER_TOKEN = 99 } FAXPP_TokenType; Modified: trunk/faxpp/src/attlistdecl.c =================================================================== --- trunk/faxpp/src/attlistdecl.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/attlistdecl.c 2008-02-27 09:44:24 UTC (rev 38) @@ -163,6 +163,7 @@ switch(env->current_char) { case '>': base_state(env); + token_end_position(env); report_token(ATTLISTDECL_CONTENT_TOKEN, env); break; LINE_ENDINGS Modified: trunk/faxpp/src/doctype.c =================================================================== --- trunk/faxpp/src/doctype.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/doctype.c 2008-02-27 09:44:24 UTC (rev 38) @@ -398,6 +398,9 @@ case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: case 0x78: case 0x79: case 0x7A: + // 0-9 + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': + case '9': case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':': case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_': case '%': @@ -439,6 +442,9 @@ case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: case 0x78: case 0x79: case 0x7A: + // 0-9 + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': + case '9': case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':': case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_': case '%': case '\'': @@ -544,7 +550,7 @@ env->state = comment_start_state2; break; case 'E': - env->state = elementdecl_initial_state1; + env->state = elementdecl_or_entitydecl_state; break; case 'A': env->state = attlistdecl_initial_state1; Modified: trunk/faxpp/src/elementdecl.c =================================================================== --- trunk/faxpp/src/elementdecl.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/elementdecl.c 2008-02-27 09:44:24 UTC (rev 38) @@ -17,6 +17,28 @@ #include "tokenizer_states.h" #include "char_classes.h" +FAXPP_Error +elementdecl_or_entitydecl_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case 'L': + env->state = elementdecl_initial_state1; + next_char(env); + break; + case 'N': + env->state = entitydecl_initial_state1; + next_char(env); + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_DTD_DECL; + } + return NO_ERROR; +} + #define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \ FAXPP_Error \ name(FAXPP_TokenizerEnv *env) \ @@ -37,12 +59,11 @@ return NO_ERROR; \ } -SINGLE_CHAR_STATE(elementdecl_initial_state1, 'L', 0, elementdecl_initial_state2, INVALID_DTD_DECL) -SINGLE_CHAR_STATE(elementdecl_initial_state2, 'E', 0, elementdecl_initial_state3, INVALID_DTD_DECL) -SINGLE_CHAR_STATE(elementdecl_initial_state3, 'M', 0, elementdecl_initial_state4, INVALID_DTD_DECL) -SINGLE_CHAR_STATE(elementdecl_initial_state4, 'E', 0, elementdecl_initial_state5, INVALID_DTD_DECL) -SINGLE_CHAR_STATE(elementdecl_initial_state5, 'N', 0, elementdecl_initial_state6, INVALID_DTD_DECL) -SINGLE_CHAR_STATE(elementdecl_initial_state6, 'T', elementdecl_name_state1, ws_plus_state, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(elementdecl_initial_state1, 'E', 0, elementdecl_initial_state2, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(elementdecl_initial_state2, 'M', 0, elementdecl_initial_state3, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(elementdecl_initial_state3, 'E', 0, elementdecl_initial_state4, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(elementdecl_initial_state4, 'N', 0, elementdecl_initial_state5, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(elementdecl_initial_state5, 'T', elementdecl_name_state1, ws_plus_state, INVALID_DTD_DECL) FAXPP_Error elementdecl_name_state1(FAXPP_TokenizerEnv *env) @@ -149,6 +170,7 @@ switch(env->current_char) { case '>': base_state(env); + token_end_position(env); report_token(ELEMENTDECL_CONTENT_TOKEN, env); break; LINE_ENDINGS Added: trunk/faxpp/src/entitydecl.c =================================================================== --- trunk/faxpp/src/entitydecl.c (rev 0) +++ trunk/faxpp/src/entitydecl.c 2008-02-27 09:44:24 UTC (rev 38) @@ -0,0 +1,574 @@ +/* + * Copyright 2007 Doxological Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tokenizer_states.h" +#include "char_classes.h" + +#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \ +FAXPP_Error \ +name(FAXPP_TokenizerEnv *env) \ +{ \ + read_char(env); \ +\ + switch(env->current_char) { \ + case (ch): \ + if((next_stored_state) != 0) env->stored_state = (next_stored_state); \ + env->state = (next_state); \ + next_char(env); \ + break; \ + LINE_ENDINGS \ + default: \ + next_char(env); \ + return (error); \ + } \ + return NO_ERROR; \ +} + +SINGLE_CHAR_STATE(entitydecl_initial_state1, 'T', 0, entitydecl_initial_state2, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(entitydecl_initial_state2, 'I', 0, entitydecl_initial_state3, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(entitydecl_initial_state3, 'T', 0, entitydecl_initial_state4, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(entitydecl_initial_state4, 'Y', entitydecl_param_or_general_state, ws_plus_state, INVALID_DTD_DECL) + +FAXPP_Error +entitydecl_param_or_general_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '%': + env->stored_state = paramentitydecl_name_state1; + env->state = ws_plus_state; + next_char(env); + break; + LINE_ENDINGS + default: + env->state = entitydecl_name_state; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ENTITYDECL_NAME; + break; + } + + return NO_ERROR; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +FAXPP_Error +entitydecl_name_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = entitydecl_content_state; + env->state = ws_state; + token_end_position(env); + report_token(ENTITYDECL_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_ENTITYDECL_NAME; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +entitydecl_content_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '"': + env->state = entitydecl_value_quot_state; + next_char(env); + token_start_position(env); + return NO_ERROR; + case '\'': + env->state = entitydecl_value_apos_state; + next_char(env); + token_start_position(env); + return NO_ERROR; + case 'S': + env->stored_state = entitydecl_ws_state; + env->state = system_id_initial_state1; + break; + case 'P': + env->stored_state = entitydecl_ws_state; + env->state = public_id_initial_state1; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_ENTITYDECL; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +entitydecl_value_apos_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + if(env->position >= env->buffer_end) { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(ENTITYDECL_VALUE_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } + + read_char_no_check(env); + + switch(env->current_char) { + case '\'': + env->state = entitydecl_end_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + return NO_ERROR; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case '%': + store_state(env); + env->state = parameter_entity_reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +entitydecl_value_quot_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + if(env->position >= env->buffer_end) { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(ENTITYDECL_VALUE_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } + + read_char_no_check(env); + + switch(env->current_char) { + case '"': + env->state = entitydecl_end_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + return NO_ERROR; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case '%': + store_state(env); + env->state = parameter_entity_reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +entitydecl_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = entitydecl_ndata_or_end_state;; + next_char(env); + break; + case '>': + env->state = entitydecl_end_state; + break; + default: + env->state = entitydecl_ndata_or_end_state; + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +entitydecl_ndata_or_end_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '>': + env->state = entitydecl_end_state; + break; + case 'N': + env->state = entitydecl_ndata_state1; + next_char(env); + break; + default: + next_char(env); + return INVALID_ENTITYDECL; + } + return NO_ERROR; +} + +SINGLE_CHAR_STATE(entitydecl_ndata_state1, 'D', 0, entitydecl_ndata_state2, INVALID_ENTITYDECL) +SINGLE_CHAR_STATE(entitydecl_ndata_state2, 'A', 0, entitydecl_ndata_state3, INVALID_ENTITYDECL) +SINGLE_CHAR_STATE(entitydecl_ndata_state3, 'T', 0, entitydecl_ndata_state4, INVALID_ENTITYDECL) +SINGLE_CHAR_STATE(entitydecl_ndata_state4, 'A', entitydecl_ndata_name_state1, ws_plus_state, INVALID_ENTITYDECL) + +FAXPP_Error +entitydecl_ndata_name_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = entitydecl_ndata_name_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_ENTITYDECL; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +entitydecl_ndata_name_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = entitydecl_end_state; + token_end_position(env); + report_token(NDATA_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + case '>': + env->state = entitydecl_end_state; + token_end_position(env); + report_token(NDATA_NAME_TOKEN, env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_ENTITYDECL; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +entitydecl_end_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '>': + base_state(env); + report_empty_token(ENTITYDECL_END_TOKEN, env); + next_char(env); + token_start_position(env); + break; + default: + next_char(env); + return INVALID_ENTITYDECL; + } + return NO_ERROR; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +FAXPP_Error +paramentitydecl_name_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = paramentitydecl_name_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ENTITYDECL_NAME; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +paramentitydecl_name_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = paramentitydecl_content_state; + env->state = ws_state; + token_end_position(env); + report_token(PARAMENTITYDECL_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_ENTITYDECL_NAME; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +paramentitydecl_content_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '"': + env->state = paramentitydecl_value_quot_state; + next_char(env); + token_start_position(env); + return NO_ERROR; + case '\'': + env->state = paramentitydecl_value_apos_state; + next_char(env); + token_start_position(env); + return NO_ERROR; + case 'S': + env->stored_state = paramentitydecl_end_state; + env->state = system_id_initial_state1; + break; + case 'P': + env->stored_state = paramentitydecl_end_state; + env->state = public_id_initial_state1; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_ENTITYDECL; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +paramentitydecl_value_apos_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + if(env->position >= env->buffer_end) { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(ENTITYDECL_VALUE_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } + + read_char_no_check(env); + + switch(env->current_char) { + case '\'': + env->state = paramentitydecl_end_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + return NO_ERROR; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case '%': + store_state(env); + env->state = parameter_entity_reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + if(env->position >= env->buffer_end) { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(ENTITYDECL_VALUE_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } + + read_char_no_check(env); + + switch(env->current_char) { + case '"': + env->state = paramentitydecl_end_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + return NO_ERROR; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case '%': + store_state(env); + env->state = parameter_entity_reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +paramentitydecl_end_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '>': + base_state(env); + report_empty_token(ENTITYDECL_END_TOKEN, env); + next_char(env); + token_start_position(env); + break; + default: + next_char(env); + return INVALID_ENTITYDECL; + } + return NO_ERROR; +} + Modified: trunk/faxpp/src/error.c =================================================================== --- trunk/faxpp/src/error.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/error.c 2008-02-27 09:44:24 UTC (rev 38) @@ -105,6 +105,10 @@ return "INVALID_CHAR_IN_ATTLISTDECL_NAME"; case INVALID_CHAR_IN_NOTATIONDECL_NAME: return "INVALID_CHAR_IN_NOTATIONDECL_NAME"; + case INVALID_CHAR_IN_ENTITYDECL_NAME: + return "INVALID_CHAR_IN_ENTITYDECL_NAME"; + case INVALID_ENTITYDECL: + return "INVALID_ENTITYDECL"; case NO_ERROR: break; } Modified: trunk/faxpp/src/notationdecl.c =================================================================== --- trunk/faxpp/src/notationdecl.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/notationdecl.c 2008-02-27 09:44:24 UTC (rev 38) @@ -98,6 +98,7 @@ switch(env->current_char) { case '>': base_state(env); + token_end_position(env); report_token(NOTATIONDECL_CONTENT_TOKEN, env); break; LINE_ENDINGS Modified: trunk/faxpp/src/token.c =================================================================== --- trunk/faxpp/src/token.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/token.c 2008-02-27 09:44:24 UTC (rev 38) @@ -96,6 +96,8 @@ return "SYSTEM_LITERAL_TOKEN"; case PUBID_LITERAL_TOKEN: return "PUBID_LITERAL_TOKEN"; + case NDATA_NAME_TOKEN: + return "NDATA_NAME_TOKEN"; case ELEMENTDECL_PREFIX_TOKEN: return "ELEMENTDECL_PREFIX_TOKEN"; @@ -116,6 +118,15 @@ case NOTATIONDECL_CONTENT_TOKEN: return "NOTATIONDECL_CONTENT_TOKEN"; + case ENTITYDECL_NAME_TOKEN: + return "ENTITYDECL_NAME_TOKEN"; + case ENTITYDECL_VALUE_TOKEN: + return "ENTITYDECL_VALUE_TOKEN"; + case ENTITYDECL_END_TOKEN: + return "ENTITYDECL_END_TOKEN"; + case PARAMENTITYDECL_NAME_TOKEN: + return "PARAMENTITYDECL_NAME_TOKEN"; + case NO_TOKEN: break; } Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/tokenizer_states.c 2008-02-27 09:44:24 UTC (rev 38) @@ -460,10 +460,10 @@ else if(state == pubid_literal_quot_state) return "pubid_literal_quot_state"; + else if(state == elementdecl_or_entitydecl_state) + return "elementdecl_or_entitydecl_state"; else if(state == elementdecl_initial_state1) return "elementdecl_initial_state1"; - else if(state == elementdecl_initial_state1) - return "elementdecl_initial_state1"; else if(state == elementdecl_initial_state2) return "elementdecl_initial_state2"; else if(state == elementdecl_initial_state3) @@ -472,8 +472,6 @@ return "elementdecl_initial_state4"; else if(state == elementdecl_initial_state5) return "elementdecl_initial_state5"; - else if(state == elementdecl_initial_state6) - return "elementdecl_initial_state6"; else if(state == elementdecl_name_state1) return "elementdecl_name_state1"; else if(state == elementdecl_name_state2) @@ -533,6 +531,59 @@ else if(state == notationdecl_content_state) return "notationdecl_content_state"; + else if(state == entitydecl_initial_state1) + return "entitydecl_initial_state1"; + else if(state == entitydecl_initial_state1) + return "entitydecl_initial_state1"; + else if(state == entitydecl_initial_state2) + return "entitydecl_initial_state2"; + else if(state == entitydecl_initial_state3) + return "entitydecl_initial_state3"; + else if(state == entitydecl_initial_state4) + return "entitydecl_initial_state4"; + else if(state == entitydecl_param_or_general_state) + return "entitydecl_param_or_general_state"; + + else if(state == entitydecl_name_state) + return "entitydecl_name_state"; + else if(state == entitydecl_content_state) + return "entitydecl_content_state"; + else if(state == entitydecl_value_apos_state) + return "entitydecl_value_apos_state"; + else if(state == entitydecl_value_quot_state) + return "entitydecl_value_quot_state"; + else if(state == entitydecl_ws_state) + return "entitydecl_ws_state"; + else if(state == entitydecl_ndata_or_end_state) + return "entitydecl_ndata_or_end_state"; + else if(state == entitydecl_ndata_state1) + return "entitydecl_ndata_state1"; + else if(state == entitydecl_ndata_state2) + return "entitydecl_ndata_state2"; + else if(state == entitydecl_ndata_state3) + return "entitydecl_ndata_state3"; + else if(state == entitydecl_ndata_state4) + return "entitydecl_ndata_state4"; + else if(state == entitydecl_ndata_name_state1) + return "entitydecl_ndata_name_state1"; + else if(state == entitydecl_ndata_name_state2) + return "entitydecl_ndata_name_state2"; + else if(state == entitydecl_end_state) + return "entitydecl_end_state"; + + else if(state == paramentitydecl_name_state1) + return "paramentitydecl_name_state1"; + else if(state == paramentitydecl_name_state2) + return "paramentitydecl_name_state2"; + else if(state == paramentitydecl_content_state) + return "paramentitydecl_content_state"; + else if(state == paramentitydecl_value_apos_state) + return "paramentitydecl_value_apos_state"; + else if(state == paramentitydecl_value_quot_state) + return "paramentitydecl_value_quot_state"; + else if(state == paramentitydecl_end_state) + return "paramentitydecl_end_state"; + return "unknown"; } #endif Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/tokenizer_states.h 2008-02-27 09:44:24 UTC (rev 38) @@ -279,12 +279,12 @@ FAXPP_Error pubid_literal_apos_state(FAXPP_TokenizerEnv *env); FAXPP_Error pubid_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_or_entitydecl_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state3(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state4(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state5(FAXPP_TokenizerEnv *env); -FAXPP_Error elementdecl_initial_state6(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env); @@ -314,7 +314,34 @@ FAXPP_Error notationdecl_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error notationdecl_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_param_or_general_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_name_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_value_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_value_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_or_end_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_end_state(FAXPP_TokenizerEnv *env); + +FAXPP_Error paramentitydecl_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_value_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_end_state(FAXPP_TokenizerEnv *env); + + /********************* * * Tokenizer Helper Functions Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/xml_parser.c 2008-02-27 09:44:24 UTC (rev 38) @@ -819,6 +819,7 @@ case DOCTYPE_END_TOKEN: case SYSTEM_LITERAL_TOKEN: case PUBID_LITERAL_TOKEN: + case NDATA_NAME_TOKEN: case PE_REFERENCE_TOKEN: case ELEMENTDECL_PREFIX_TOKEN: case ELEMENTDECL_NAME_TOKEN: @@ -828,6 +829,10 @@ case ATTLISTDECL_CONTENT_TOKEN: case NOTATIONDECL_NAME_TOKEN: case NOTATIONDECL_CONTENT_TOKEN: + case ENTITYDECL_NAME_TOKEN: + case ENTITYDECL_VALUE_TOKEN: + case ENTITYDECL_END_TOKEN: + case PARAMENTITYDECL_NAME_TOKEN: // TBD - jpcs break; Modified: trunk/faxpp/src/xml_parser.h =================================================================== --- trunk/faxpp/src/xml_parser.h 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/xml_parser.h 2008-02-27 09:44:24 UTC (rev 38) @@ -49,6 +49,23 @@ struct FAXPP_ElementInfo_s *prev; } FAXPP_ElementInfo; +typedef struct FAXPP_EntityValue_s { + FAXPP_TokenType type; + FAXPP_Text value; + + struct FAXPP_EntityValue_s *next; +} FAXPP_EntityValue; + +typedef struct FAXPP_EntityInfo_s { + FAXPP_Text name; + + FAXPP_EntityValue value; + + FAXPP_Buffer buffer; + + struct FAXPP_EntityInfo_s *prev; +} FAXPP_EntityInfo; + typedef struct FAXPP_ParserEnv_s FAXPP_ParserEnv; typedef FAXPP_Error (*FAXPP_NextEvent)(FAXPP_ParserEnv *env); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |