[Faxpp-devel] SF.net SVN: faxpp: [32] trunk/faxpp
Status: Beta
Brought to you by:
jpcs
From: <jp...@us...> - 2007-09-25 11:50:52
|
Revision: 32 http://faxpp.svn.sourceforge.net/faxpp/?rev=32&view=rev Author: jpcs Date: 2007-09-25 04:50:47 -0700 (Tue, 25 Sep 2007) Log Message: ----------- Started to implement tokenizing of doctype declarations. Modified Paths: -------------- trunk/faxpp/Makefile.am trunk/faxpp/Makefile.in trunk/faxpp/TODO trunk/faxpp/include/faxpp/error.h trunk/faxpp/include/faxpp/token.h trunk/faxpp/src/comment.c trunk/faxpp/src/error.c trunk/faxpp/src/token.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_tokenizer.c trunk/faxpp/src/xml_tokenizer.h trunk/faxpp/src/xmldecl.c trunk/faxpp/tests/xmlconf_runner.c Added Paths: ----------- trunk/faxpp/src/doctype.c Modified: trunk/faxpp/Makefile.am =================================================================== --- trunk/faxpp/Makefile.am 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/Makefile.am 2007-09-25 11:50:47 UTC (rev 32) @@ -31,7 +31,8 @@ src/xml_parser.c \ src/xml_parser.h \ src/xml_tokenizer.c \ -src/xml_tokenizer.h +src/xml_tokenizer.h \ +src/doctype.c tokenizer_example_SOURCES = examples/tokenizer_example.c tokenizer_example_LDADD = libfaxpp.la Modified: trunk/faxpp/Makefile.in =================================================================== --- trunk/faxpp/Makefile.in 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/Makefile.in 2007-09-25 11:50:47 UTC (rev 32) @@ -63,7 +63,7 @@ am_libfaxpp_la_OBJECTS = buffer.lo cdata.lo char_classes.lo comment.lo \ element.lo error.lo event.lo pi.lo reference.lo token.lo \ tokenizer_states.lo transcode.lo xmldecl.lo xml_parser.lo \ - xml_tokenizer.lo + xml_tokenizer.lo doctype.lo libfaxpp_la_OBJECTS = $(am_libfaxpp_la_OBJECTS) libfaxpp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -251,7 +251,8 @@ src/xml_parser.c \ src/xml_parser.h \ src/xml_tokenizer.c \ -src/xml_tokenizer.h +src/xml_tokenizer.h \ +src/doctype.c tokenizer_example_SOURCES = examples/tokenizer_example.c tokenizer_example_LDADD = libfaxpp.la @@ -377,6 +378,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cdata.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/char_classes.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/comment.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doctype.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/element.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/event.Plo@am__quote@ @@ -518,6 +520,13 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xml_tokenizer.lo `test -f 'src/xml_tokenizer.c' || echo '$(srcdir)/'`src/xml_tokenizer.c +doctype.lo: src/doctype.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT doctype.lo -MD -MP -MF $(DEPDIR)/doctype.Tpo -c -o doctype.lo `test -f 'src/doctype.c' || echo '$(srcdir)/'`src/doctype.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/doctype.Tpo $(DEPDIR)/doctype.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='src/doctype.c' object='doctype.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o doctype.lo `test -f 'src/doctype.c' || echo '$(srcdir)/'`src/doctype.c + parser_example.o: examples/parser_example.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT parser_example.o -MD -MP -MF $(DEPDIR)/parser_example.Tpo -c -o parser_example.o `test -f 'examples/parser_example.c' || echo '$(srcdir)/'`examples/parser_example.c @am__fastdepCC_TRUE@ mv -f $(DEPDIR)/parser_example.Tpo $(DEPDIR)/parser_example.Po Modified: trunk/faxpp/TODO =================================================================== --- trunk/faxpp/TODO 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/TODO 2007-09-25 11:50:47 UTC (rev 32) @@ -1,6 +1,7 @@ Small tasks ----------- +Don't accept Namespace 1.1 undefines in XML 1.0 mode Normalize line endings in element character content / PI values / comment values Accept XML 1.1 line endings as whitespace Handle "xml" namespace properly Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/include/faxpp/error.h 2007-09-25 11:50:47 UTC (rev 32) @@ -44,7 +44,10 @@ INVALID_ENCODING_VALUE, CDATA_END_IN_ELEMENT_CONTENT, UNDEFINED_ENTITY, - DOCTYPE_NOT_IMPLEMENTED, ///< TBD Remove this error - jpcs + INVALID_DOCTYPE_DECL, + INVALID_CHAR_IN_DOCTYPE_NAME, + INVALID_SYSTEM_ID, + EXPECTING_SYSTEM_LITERAL, OUT_OF_MEMORY, ELEMENT_NAME_MISMATCH, Modified: trunk/faxpp/include/faxpp/token.h =================================================================== --- trunk/faxpp/include/faxpp/token.h 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/include/faxpp/token.h 2007-09-25 11:50:47 UTC (rev 32) @@ -64,6 +64,12 @@ DEC_CHAR_REFERENCE_TOKEN, HEX_CHAR_REFERENCE_TOKEN, + DOCTYPE_PREFIX_TOKEN, + DOCTYPE_NAME_TOKEN, + DOCTYPE_END_TOKEN, + + SYSTEM_LITERAL_TOKEN, + END_OF_BUFFER_TOKEN = 99 } FAXPP_TokenType; Modified: trunk/faxpp/src/comment.c =================================================================== --- trunk/faxpp/src/comment.c 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/src/comment.c 2007-09-25 11:50:47 UTC (rev 32) @@ -27,11 +27,6 @@ env->state = comment_start_state2; next_char(env); break; - case 'D': - env->state = comment_content_state; - token_start_position(env); - next_char(env); - return DOCTYPE_NOT_IMPLEMENTED; LINE_ENDINGS default: env->state = comment_content_state; Added: trunk/faxpp/src/doctype.c =================================================================== --- trunk/faxpp/src/doctype.c (rev 0) +++ trunk/faxpp/src/doctype.c 2007-09-25 11:50:47 UTC (rev 32) @@ -0,0 +1,390 @@ +/* + * Copyright 2007 Doxological Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tokenizer_states.h" +#include "char_classes.h" + +FAXPP_Error +doctype_or_comment_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '-': + env->state = comment_start_state2; + break; + case 'D': + env->state = doctype_initial_state1; + env->seen_doctype = 1; + break; + LINE_ENDINGS + default: + env->state = comment_content_state; + token_start_position(env); + next_char(env); + return INVALID_START_OF_COMMENT; + } + next_char(env); + return NO_ERROR; +} + +#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \ +FAXPP_Error \ +name(FAXPP_TokenizerEnv *env) \ +{ \ + read_char(env); \ +\ + switch(env->current_char) { \ + case (ch): \ + if((next_stored_state) != 0) env->stored_state = (next_stored_state); \ + env->state = (next_state); \ + next_char(env); \ + break; \ + LINE_ENDINGS \ + default: \ + next_char(env); \ + return (error); \ + } \ + return NO_ERROR; \ +} + +SINGLE_CHAR_STATE(doctype_initial_state1, 'O', 0, doctype_initial_state2, INVALID_DOCTYPE_DECL) +SINGLE_CHAR_STATE(doctype_initial_state2, 'C', 0, doctype_initial_state3, INVALID_DOCTYPE_DECL) +SINGLE_CHAR_STATE(doctype_initial_state3, 'T', 0, doctype_initial_state4, INVALID_DOCTYPE_DECL) +SINGLE_CHAR_STATE(doctype_initial_state4, 'Y', 0, doctype_initial_state5, INVALID_DOCTYPE_DECL) +SINGLE_CHAR_STATE(doctype_initial_state5, 'P', 0, doctype_initial_state6, INVALID_DOCTYPE_DECL) +SINGLE_CHAR_STATE(doctype_initial_state6, 'E', doctype_name_state1, ws_plus_state, INVALID_DOCTYPE_DECL) + +FAXPP_Error +doctype_name_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = doctype_name_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_DOCTYPE_NAME; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +doctype_name_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = doctype_after_name_state; + token_end_position(env); + report_token(DOCTYPE_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + case '[': + env->state = doctype_internal_subset_state; + token_end_position(env); + report_token(DOCTYPE_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + case '>': + env->state = doctype_end_state; + token_end_position(env); + report_token(DOCTYPE_NAME_TOKEN, env); + // no next char + return NO_ERROR; + case ':': + env->state = doctype_name_seen_colon_state1; + token_end_position(env); + report_token(DOCTYPE_PREFIX_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_DOCTYPE_NAME; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +doctype_name_seen_colon_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = doctype_name_seen_colon_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_DOCTYPE_NAME; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +doctype_name_seen_colon_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = doctype_after_name_state; + token_end_position(env); + report_token(DOCTYPE_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + case '[': + env->state = doctype_internal_subset_state; + token_end_position(env); + report_token(DOCTYPE_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + case '>': + env->state = doctype_end_state; + token_end_position(env); + report_token(DOCTYPE_NAME_TOKEN, env); + // no next char + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_DOCTYPE_NAME; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +doctype_after_name_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + break; + case 'S': + env->stored_state = doctype_internal_subset_start_state; + env->state = system_id_initial_state1; + break; +/* case 'P': */ +/* env->stored_state = doctype_internal_subset_start_state; */ +/* env->state = public_id_initial_state1; */ +/* break; */ + case '[': + env->state = doctype_internal_subset_state; + break; + case '>': + base_state(env); + report_empty_token(DOCTYPE_END_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + default: + next_char(env); + return INVALID_DOCTYPE_DECL; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +doctype_internal_subset_start_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + break; + case '[': + env->state = doctype_internal_subset_state; + break; + case '>': + base_state(env); + report_empty_token(DOCTYPE_END_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + default: + next_char(env); + return INVALID_DOCTYPE_DECL; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +doctype_internal_subset_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case ']': + env->state = doctype_end_state; + next_char(env); + return NO_ERROR; + LINE_ENDINGS + default: + next_char(env); + return INVALID_DOCTYPE_DECL; + } + return NO_ERROR; +} + +FAXPP_Error +doctype_end_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '>': + base_state(env); + report_empty_token(DOCTYPE_END_TOKEN, env); + next_char(env); + token_start_position(env); + break; + default: + next_char(env); + return INVALID_DOCTYPE_DECL; + } + return NO_ERROR; +} + +SINGLE_CHAR_STATE(system_id_initial_state1, 'Y', 0, system_id_initial_state2, INVALID_SYSTEM_ID) +SINGLE_CHAR_STATE(system_id_initial_state2, 'S', 0, system_id_initial_state3, INVALID_SYSTEM_ID) +SINGLE_CHAR_STATE(system_id_initial_state3, 'T', 0, system_id_initial_state4, INVALID_SYSTEM_ID) +SINGLE_CHAR_STATE(system_id_initial_state4, 'E', 0, system_id_initial_state5, INVALID_SYSTEM_ID) +SINGLE_CHAR_STATE(system_id_initial_state5, 'M', 0, system_id_ws_state, INVALID_SYSTEM_ID) + +FAXPP_Error +system_id_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = system_literal_start_state; + next_char(env); + break; + default: + next_char(env); + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +system_literal_start_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + return NO_ERROR; + case '"': + env->state = system_literal_quot_state; + break; + case '\'': + env->state = system_literal_apos_state; + break; + default: + next_char(env); + return EXPECTING_SYSTEM_LITERAL; + } + next_char(env); + token_start_position(env); + return NO_ERROR; +} + +FAXPP_Error +system_literal_apos_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + case '\'': + retrieve_state(env); + token_end_position(env); + report_token(SYSTEM_LITERAL_TOKEN, env); + next_char(env); + return NO_ERROR; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +system_literal_quot_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + case '"': + retrieve_state(env); + token_end_position(env); + report_token(SYSTEM_LITERAL_TOKEN, env); + next_char(env); + return NO_ERROR; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + Modified: trunk/faxpp/src/error.c =================================================================== --- trunk/faxpp/src/error.c 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/src/error.c 2007-09-25 11:50:47 UTC (rev 32) @@ -83,8 +83,14 @@ return "CDATA_END_IN_ELEMENT_CONTENT"; case UNDEFINED_ENTITY: return "UNDEFINED_ENTITY"; - case DOCTYPE_NOT_IMPLEMENTED: - return "DOCTYPE_NOT_IMPLEMENTED"; + case INVALID_DOCTYPE_DECL: + return "INVALID_DOCTYPE_DECL"; + case INVALID_CHAR_IN_DOCTYPE_NAME: + return "INVALID_CHAR_IN_DOCTYPE_NAME"; + case INVALID_SYSTEM_ID: + return "INVALID_SYSTEM_ID"; + case EXPECTING_SYSTEM_LITERAL: + return "EXPECTING_SYSTEM_LITERAL"; case NO_ERROR: break; } Modified: trunk/faxpp/src/token.c =================================================================== --- trunk/faxpp/src/token.c 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/src/token.c 2007-09-25 11:50:47 UTC (rev 32) @@ -82,6 +82,17 @@ return "LT_ENTITY_REFERENCE_TOKEN"; case QUOT_ENTITY_REFERENCE_TOKEN: return "QUOT_ENTITY_REFERENCE_TOKEN"; + + case DOCTYPE_PREFIX_TOKEN: + return "DOCTYPE_PREFIX_TOKEN"; + case DOCTYPE_NAME_TOKEN: + return "DOCTYPE_NAME_TOKEN"; + case DOCTYPE_END_TOKEN: + return "DOCTYPE_END_TOKEN"; + + case SYSTEM_LITERAL_TOKEN: + return "SYSTEM_LITERAL_TOKEN"; + case NO_TOKEN: break; } Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/src/tokenizer_states.c 2007-09-25 11:50:47 UTC (rev 32) @@ -385,6 +385,56 @@ else if(state == xml_decl_question_state) return "xml_decl_question_state"; + else if(state == doctype_or_comment_state) + return "doctype_or_comment_state"; + else if(state == doctype_initial_state1) + return "doctype_initial_state1"; + else if(state == doctype_initial_state2) + return "doctype_initial_state2"; + else if(state == doctype_initial_state3) + return "doctype_initial_state3"; + else if(state == doctype_initial_state4) + return "doctype_initial_state4"; + else if(state == doctype_initial_state5) + return "doctype_initial_state5"; + else if(state == doctype_initial_state6) + return "doctype_initial_state6"; + else if(state == doctype_name_state1) + return "doctype_name_state1"; + else if(state == doctype_name_state2) + return "doctype_name_state2"; + else if(state == doctype_name_seen_colon_state1) + return "doctype_name_seen_colon_state1"; + else if(state == doctype_name_seen_colon_state2) + return "doctype_name_seen_colon_state2"; + else if(state == doctype_after_name_state) + return "doctype_after_name_state"; + else if(state == doctype_internal_subset_start_state) + return "doctype_internal_subset_start_state"; + else if(state == doctype_internal_subset_state) + return "doctype_internal_subset_state"; + else if(state == doctype_end_state) + return "doctype_end_state"; + + else if(state == system_id_initial_state1) + return "system_id_initial_state1"; + else if(state == system_id_initial_state2) + return "system_id_initial_state2"; + else if(state == system_id_initial_state3) + return "system_id_initial_state3"; + else if(state == system_id_initial_state4) + return "system_id_initial_state4"; + else if(state == system_id_initial_state5) + return "system_id_initial_state5"; + else if(state == system_id_ws_state) + return "system_id_ws_state"; + else if(state == system_literal_start_state) + return "system_literal_start_state"; + else if(state == system_literal_apos_state) + return "system_literal_apos_state"; + else if(state == system_literal_quot_state) + return "system_literal_quot_state"; + return "unknown"; } #endif @@ -533,7 +583,9 @@ token_start_position(env); break; case '!': - env->state = comment_start_state1; + if(env->seen_doctype) + env->state = comment_start_state1; + else env->state = doctype_or_comment_state; next_char(env); token_start_position(env); break; Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/src/tokenizer_states.h 2007-09-25 11:50:47 UTC (rev 32) @@ -240,6 +240,33 @@ FAXPP_Error xml_decl_question_state(FAXPP_TokenizerEnv *env); FAXPP_Error xml_decl_seen_question_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_or_comment_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_initial_state6(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_name_seen_colon_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_name_seen_colon_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_after_name_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_internal_subset_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_internal_subset_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_end_state(FAXPP_TokenizerEnv *env); + +FAXPP_Error system_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error system_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error system_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error system_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error system_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error system_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error system_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error system_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error system_literal_quot_state(FAXPP_TokenizerEnv *env); + + /********************* * * Tokenizer Helper Functions Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/src/xml_parser.c 2007-09-25 11:50:47 UTC (rev 32) @@ -810,6 +810,13 @@ p_next_token(err, env); switch(env->tenv.result_token.type) { + case DOCTYPE_PREFIX_TOKEN: + case DOCTYPE_NAME_TOKEN: + case DOCTYPE_END_TOKEN: + case SYSTEM_LITERAL_TOKEN: + // TBD - jpcs + break; + case XML_DECL_VERSION_TOKEN: case XML_DECL_ENCODING_TOKEN: case XML_DECL_STANDALONE_TOKEN: Modified: trunk/faxpp/src/xml_tokenizer.c =================================================================== --- trunk/faxpp/src/xml_tokenizer.c 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/src/xml_tokenizer.c 2007-09-25 11:50:47 UTC (rev 32) @@ -388,6 +388,7 @@ env->nesting_level = 0; env->do_encode = 1; + env->seen_doctype = 0; env->seen_doc_element = 0; env->buffer_done = done; Modified: trunk/faxpp/src/xml_tokenizer.h =================================================================== --- trunk/faxpp/src/xml_tokenizer.h 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/src/xml_tokenizer.h 2007-09-25 11:50:47 UTC (rev 32) @@ -43,6 +43,7 @@ unsigned int nesting_level; unsigned int do_encode:1; + unsigned int seen_doctype:1; unsigned int seen_doc_element:1; unsigned int buffer_done:1; unsigned int normalize_attrs:1; Modified: trunk/faxpp/src/xmldecl.c =================================================================== --- trunk/faxpp/src/xmldecl.c 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/src/xmldecl.c 2007-09-25 11:50:47 UTC (rev 32) @@ -29,7 +29,7 @@ token_start_position(env); break; case '!': - env->state = comment_start_state1; + env->state = doctype_or_comment_state; next_char(env); token_start_position(env); break; Modified: trunk/faxpp/tests/xmlconf_runner.c =================================================================== --- trunk/faxpp/tests/xmlconf_runner.c 2007-09-21 11:42:51 UTC (rev 31) +++ trunk/faxpp/tests/xmlconf_runner.c 2007-09-25 11:50:47 UTC (rev 32) @@ -319,12 +319,12 @@ calculateBase(base_buffer, &attr->value, file_buffer); result = run_test_case(file_buffer, &errLine); - if(result == DOCTYPE_NOT_IMPLEMENTED) { - printf("^"); - fflush(stdout); - ++test_skips; - break; - } +/* if(result == DOCTYPE_NOT_IMPLEMENTED) { */ +/* printf("^"); */ +/* fflush(stdout); */ +/* ++test_skips; */ +/* break; */ +/* } */ // @TYPE is not-wf, error, invalid, or valid attr = find_attribute(event, "TYPE"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |