[Faxpp-devel] SF.net SVN: faxpp: [43] trunk/faxpp
Status: Beta
Brought to you by:
jpcs
From: <jp...@us...> - 2008-03-14 15:24:58
|
Revision: 43 http://faxpp.svn.sourceforge.net/faxpp/?rev=43&view=rev Author: jpcs Date: 2008-03-14 08:24:54 -0700 (Fri, 14 Mar 2008) Log Message: ----------- Added the ability to parse conditional sections in external DTDs. Added tokens for attlist declarations. Fixed a few memory bugs shown by valgrind. Modified Paths: -------------- trunk/faxpp/Makefile.am trunk/faxpp/Makefile.in trunk/faxpp/include/faxpp/error.h trunk/faxpp/include/faxpp/token.h trunk/faxpp/src/attlistdecl.c trunk/faxpp/src/doctype.c trunk/faxpp/src/elementdecl.c trunk/faxpp/src/error.c trunk/faxpp/src/token.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_parser.h trunk/faxpp/src/xml_tokenizer.c trunk/faxpp/src/xml_tokenizer.h trunk/faxpp/tests/xmlconf_runner.c Added Paths: ----------- trunk/faxpp/src/conditional.c Modified: trunk/faxpp/Makefile.am =================================================================== --- trunk/faxpp/Makefile.am 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/Makefile.am 2008-03-14 15:24:54 UTC (rev 43) @@ -36,7 +36,8 @@ src/elementdecl.c \ src/attlistdecl.c \ src/notationdecl.c \ -src/entitydecl.c +src/entitydecl.c \ +src/conditional.c tokenizer_example_LDADD = libfaxpp.la tokenizer_example_SOURCES = examples/tokenizer_example.c Modified: trunk/faxpp/Makefile.in =================================================================== --- trunk/faxpp/Makefile.in 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/Makefile.in 2008-03-14 15:24:54 UTC (rev 43) @@ -64,7 +64,7 @@ element.lo error.lo event.lo pi.lo reference.lo token.lo \ tokenizer_states.lo transcode.lo xmldecl.lo xml_parser.lo \ xml_tokenizer.lo doctype.lo elementdecl.lo attlistdecl.lo \ - notationdecl.lo entitydecl.lo + notationdecl.lo entitydecl.lo conditional.lo libfaxpp_la_OBJECTS = $(am_libfaxpp_la_OBJECTS) libfaxpp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -259,7 +259,8 @@ src/elementdecl.c \ src/attlistdecl.c \ src/notationdecl.c \ -src/entitydecl.c +src/entitydecl.c \ +src/conditional.c tokenizer_example_LDADD = libfaxpp.la tokenizer_example_SOURCES = examples/tokenizer_example.c @@ -394,6 +395,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cdata.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/char_classes.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/comment.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conditional.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doctype.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/element.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elementdecl.Plo@am__quote@ @@ -576,6 +578,13 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o entitydecl.lo `test -f 'src/entitydecl.c' || echo '$(srcdir)/'`src/entitydecl.c +conditional.lo: src/conditional.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT conditional.lo -MD -MP -MF $(DEPDIR)/conditional.Tpo -c -o conditional.lo `test -f 'src/conditional.c' || echo '$(srcdir)/'`src/conditional.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/conditional.Tpo $(DEPDIR)/conditional.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='src/conditional.c' object='conditional.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o conditional.lo `test -f 'src/conditional.c' || echo '$(srcdir)/'`src/conditional.c + parser_example.o: examples/parser_example.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT parser_example.o -MD -MP -MF $(DEPDIR)/parser_example.Tpo -c -o parser_example.o `test -f 'examples/parser_example.c' || echo '$(srcdir)/'`examples/parser_example.c @am__fastdepCC_TRUE@ mv -f $(DEPDIR)/parser_example.Tpo $(DEPDIR)/parser_example.Po Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/include/faxpp/error.h 2008-03-14 15:24:54 UTC (rev 43) @@ -68,6 +68,7 @@ INVALID_ATTRIBUTE_TYPE, INVALID_DEFAULTDECL, INVALID_ELEMENTDECL_CONTENT, + INVALID_CONDITIONAL_SECTION, OUT_OF_MEMORY, ELEMENT_NAME_MISMATCH, Modified: trunk/faxpp/include/faxpp/token.h =================================================================== --- trunk/faxpp/include/faxpp/token.h 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/include/faxpp/token.h 2008-03-14 15:24:54 UTC (rev 43) @@ -91,8 +91,19 @@ ATTLISTDECL_NAME_TOKEN, ATTLISTDECL_ATTDEF_PREFIX_TOKEN, ATTLISTDECL_ATTDEF_NAME_TOKEN, + ATTLISTDECL_ATTTYPE_ENTITY_TOKEN, + ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN, + ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN, + ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN, + ATTLISTDECL_ATTTYPE_ID_TOKEN, + ATTLISTDECL_ATTTYPE_IDREF_TOKEN, + ATTLISTDECL_ATTTYPE_IDREFS_TOKEN, + ATTLISTDECL_ATTTYPE_CDATA_TOKEN, ATTLISTDECL_NOTATION_NAME_TOKEN, ATTLISTDECL_ENUMERATION_NAME_TOKEN, + ATTLISTDECL_DEFAULT_IMPLIED_TOKEN, + ATTLISTDECL_DEFAULT_REQUIRED_TOKEN, + ATTLISTDECL_DEFAULT_FIXED_TOKEN, ATTLISTDECL_END_TOKEN, NOTATIONDECL_NAME_TOKEN, Modified: trunk/faxpp/src/attlistdecl.c =================================================================== --- trunk/faxpp/src/attlistdecl.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/attlistdecl.c 2008-03-14 15:24:54 UTC (rev 43) @@ -17,7 +17,7 @@ #include "tokenizer_states.h" #include "char_classes.h" -#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \ +#define SINGLE_CHAR_STATE_RETURN(name, ch, next_stored_state, next_state, error, return_token) \ FAXPP_Error \ name(FAXPP_TokenizerEnv *env) \ { \ @@ -27,6 +27,7 @@ case (ch): \ if((next_stored_state) != 0) env->stored_state = (next_stored_state); \ env->state = (next_state); \ + if((return_token) != NO_TOKEN) { report_empty_token((return_token), env); } \ next_char(env); \ break; \ LINE_ENDINGS \ @@ -37,6 +38,8 @@ return NO_ERROR; \ } +#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) SINGLE_CHAR_STATE_RETURN(name, ch, next_stored_state, next_state, error, NO_TOKEN) + SINGLE_CHAR_STATE(attlistdecl_initial_state1, 'T', 0, attlistdecl_initial_state2, INVALID_DTD_DECL) SINGLE_CHAR_STATE(attlistdecl_initial_state2, 'T', 0, attlistdecl_initial_state3, INVALID_DTD_DECL) SINGLE_CHAR_STATE(attlistdecl_initial_state3, 'L', 0, attlistdecl_initial_state4, INVALID_DTD_DECL) @@ -316,9 +319,9 @@ switch(env->current_char) { case 'Y': - // TBD Tokens for these - jpcs env->stored_state = attlistdecl_default_state1; env->state = ws_state; + report_empty_token(ATTLISTDECL_ATTTYPE_ENTITY_TOKEN, env); break; case 'I': env->state = attlistdecl_atttype_entities_state1; @@ -333,10 +336,8 @@ return NO_ERROR; } -// TBD Tokens for these - jpcs - SINGLE_CHAR_STATE(attlistdecl_atttype_entities_state1, 'E', 0, attlistdecl_atttype_entities_state2, INVALID_ATTRIBUTE_TYPE) -SINGLE_CHAR_STATE(attlistdecl_atttype_entities_state2, 'S', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE_RETURN(attlistdecl_atttype_entities_state2, 'S', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE, ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN) FAXPP_Error attlistdecl_atttype_nmtoken_state1(FAXPP_TokenizerEnv *env) @@ -373,14 +374,14 @@ switch(env->current_char) { WHITESPACE: - // TBD Tokens for these - jpcs env->stored_state = attlistdecl_default_state1; env->state = ws_state; + report_empty_token(ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN, env); break; case 'S': - // TBD Tokens for these - jpcs env->stored_state = attlistdecl_default_state1; env->state = ws_plus_state; + report_empty_token(ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN, env); break; default: next_char(env); @@ -391,8 +392,6 @@ return NO_ERROR; } -// TBD Tokens for these - jpcs - SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state1, 'T', 0, attlistdecl_atttype_notation_state2, INVALID_ATTRIBUTE_TYPE) SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state2, 'A', 0, attlistdecl_atttype_notation_state3, INVALID_ATTRIBUTE_TYPE) SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state3, 'T', 0, attlistdecl_atttype_notation_state4, INVALID_ATTRIBUTE_TYPE) @@ -504,9 +503,9 @@ switch(env->current_char) { WHITESPACE: - // TBD Tokens for these - jpcs env->stored_state = attlistdecl_default_state1; env->state = ws_state; + report_empty_token(ATTLISTDECL_ATTTYPE_ID_TOKEN, env); break; case 'R': env->state = attlistdecl_atttype_idref_state1; @@ -530,14 +529,14 @@ switch(env->current_char) { WHITESPACE: - // TBD Tokens for these - jpcs env->stored_state = attlistdecl_default_state1; env->state = ws_state; + report_empty_token(ATTLISTDECL_ATTTYPE_IDREF_TOKEN, env); break; case 'S': - // TBD Tokens for these - jpcs env->stored_state = attlistdecl_default_state1; env->state = ws_plus_state; + report_empty_token(ATTLISTDECL_ATTTYPE_IDREFS_TOKEN, env); break; default: next_char(env); @@ -548,12 +547,10 @@ return NO_ERROR; } -// TBD Tokens for these - jpcs - SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state1, 'D', 0, attlistdecl_atttype_cdata_state2, INVALID_ATTRIBUTE_TYPE) SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state2, 'A', 0, attlistdecl_atttype_cdata_state3, INVALID_ATTRIBUTE_TYPE) SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state3, 'T', 0, attlistdecl_atttype_cdata_state4, INVALID_ATTRIBUTE_TYPE) -SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state4, 'A', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE_RETURN(attlistdecl_atttype_cdata_state4, 'A', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE, ATTLISTDECL_ATTTYPE_CDATA_TOKEN) FAXPP_Error attlistdecl_atttype_enumeration_name_state1(FAXPP_TokenizerEnv *env) @@ -688,14 +685,12 @@ return NO_ERROR; } -// TBD Tokens for these - jpcs - SINGLE_CHAR_STATE(attlistdecl_default_implied_state1, 'M', 0, attlistdecl_default_implied_state2, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_implied_state2, 'P', 0, attlistdecl_default_implied_state3, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_implied_state3, 'L', 0, attlistdecl_default_implied_state4, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_implied_state4, 'I', 0, attlistdecl_default_implied_state5, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_implied_state5, 'E', 0, attlistdecl_default_implied_state6, INVALID_DEFAULTDECL) -SINGLE_CHAR_STATE(attlistdecl_default_implied_state6, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE_RETURN(attlistdecl_default_implied_state6, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_IMPLIED_TOKEN) SINGLE_CHAR_STATE(attlistdecl_default_required_state1, 'E', 0, attlistdecl_default_required_state2, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_required_state2, 'Q', 0, attlistdecl_default_required_state3, INVALID_DEFAULTDECL) @@ -703,12 +698,12 @@ SINGLE_CHAR_STATE(attlistdecl_default_required_state4, 'I', 0, attlistdecl_default_required_state5, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_required_state5, 'R', 0, attlistdecl_default_required_state6, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_required_state6, 'E', 0, attlistdecl_default_required_state7, INVALID_DEFAULTDECL) -SINGLE_CHAR_STATE(attlistdecl_default_required_state7, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE_RETURN(attlistdecl_default_required_state7, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_REQUIRED_TOKEN) SINGLE_CHAR_STATE(attlistdecl_default_fixed_state1, 'I', 0, attlistdecl_default_fixed_state2, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_fixed_state2, 'X', 0, attlistdecl_default_fixed_state3, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_fixed_state3, 'E', 0, attlistdecl_default_fixed_state4, INVALID_DEFAULTDECL) -SINGLE_CHAR_STATE(attlistdecl_default_fixed_state4, 'D', attlistdecl_attvalue_start_state, ws_plus_state, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE_RETURN(attlistdecl_default_fixed_state4, 'D', attlistdecl_attvalue_start_state, ws_plus_state, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_FIXED_TOKEN) FAXPP_Error attlistdecl_attvalue_start_state(FAXPP_TokenizerEnv *env) Added: trunk/faxpp/src/conditional.c =================================================================== --- trunk/faxpp/src/conditional.c (rev 0) +++ trunk/faxpp/src/conditional.c 2008-03-14 15:24:54 UTC (rev 43) @@ -0,0 +1,197 @@ +/* + * Copyright 2007 Doxological Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tokenizer_states.h" +#include "char_classes.h" + +#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \ +FAXPP_Error \ +name(FAXPP_TokenizerEnv *env) \ +{ \ + read_char(env); \ +\ + switch(env->current_char) { \ + case (ch): \ + if((next_stored_state) != 0) env->stored_state = (next_stored_state); \ + env->state = (next_state); \ + next_char(env); \ + break; \ + LINE_ENDINGS \ + default: \ + next_char(env); \ + return (error); \ + } \ + return NO_ERROR; \ +} + +SINGLE_CHAR_STATE(conditional_state1, 'I', 0, conditional_state2, INVALID_CONDITIONAL_SECTION) + +FAXPP_Error +conditional_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case 'N': + env->state = include_state1; + env->nesting_level += 1; + break; + case 'G': + env->state = ignore_state1; + env->nesting_level += 1; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_CONDITIONAL_SECTION; + } + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(ignore_state1, 'N', 0, ignore_state2, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(ignore_state2, 'O', 0, ignore_state3, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(ignore_state3, 'R', 0, ignore_state4, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(ignore_state4, 'E', ignore_state5, ws_state, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(ignore_state5, '[', 0, ignore_content_state, INVALID_CONDITIONAL_SECTION) + +FAXPP_Error +ignore_content_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '<': + env->state = ignore_content_seen_lt_state; + break; + case ']': + env->state = ignore_content_seen_rsquare_state1; + break; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +ignore_content_seen_lt_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '!': + env->state = ignore_content_seen_bang_state; + break; + LINE_ENDINGS + default: + env->state = ignore_content_state; + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +ignore_content_seen_bang_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + env->state = ignore_content_state; + + switch(env->current_char) { + case '[': + env->nesting_level += 1; + break; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +ignore_content_seen_rsquare_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case ']': + env->state = ignore_content_seen_rsquare_state2; + break; + LINE_ENDINGS + default: + env->state = ignore_content_state; + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +ignore_content_seen_rsquare_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case ']': + break; + case '>': + env->nesting_level -= 1; + if(env->nesting_level == 0) { + base_state(env); + } + else { + env->state = ignore_content_state; + } + break; + LINE_ENDINGS + default: + env->state = ignore_content_state; + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(include_state1, 'C', 0, include_state2, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(include_state2, 'L', 0, include_state3, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(include_state3, 'U', 0, include_state4, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(include_state4, 'D', 0, include_state5, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(include_state5, 'E', include_state6, ws_state, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(include_state6, '[', 0, external_subset_state, INVALID_CONDITIONAL_SECTION) Modified: trunk/faxpp/src/doctype.c =================================================================== --- trunk/faxpp/src/doctype.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/doctype.c 2008-03-14 15:24:54 UTC (rev 43) @@ -557,7 +557,7 @@ break; default: next_char(env); - return INVALID_DOCTYPE_DECL; + return INVALID_DTD_DECL; } next_char(env); @@ -583,7 +583,7 @@ break; default: next_char(env); - return INVALID_DOCTYPE_DECL; + return INVALID_DTD_DECL; } next_char(env); @@ -681,9 +681,16 @@ case '<': env->state = external_subset_markup_state; break; + case ']': + // Check if we're in an include section + if(env->nesting_level != 0) { + env->state = external_subset_seen_rsquare_state1; + break; + } + // Fall through default: next_char(env); - return INVALID_DOCTYPE_DECL; + return INVALID_DTD_DECL; } next_char(env); @@ -691,6 +698,48 @@ } FAXPP_Error +external_subset_seen_rsquare_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case ']': + env->state = external_subset_seen_rsquare_state2; + break; + default: + base_state(env); + // No next_char + return INVALID_DTD_DECL; + } + + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +external_subset_seen_rsquare_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case ']': + next_char(env); + return INVALID_DTD_DECL; + case '>': + env->nesting_level -= 1; + base_state(env); + break; + default: + base_state(env); + // No next_char + return INVALID_DTD_DECL; + } + + next_char(env); + return NO_ERROR; +} + +FAXPP_Error external_subset_markup_state(FAXPP_TokenizerEnv *env) { read_char(env); @@ -722,9 +771,10 @@ case '-': env->state = comment_start_state2; break; -/* // TBD conditional sections - jpcs */ -/* case '[': */ -/* break; */ + case '[': + env->stored_state = conditional_state1; + env->state = ws_state; + break; case 'E': env->state = elementdecl_or_entitydecl_state; break; Modified: trunk/faxpp/src/elementdecl.c =================================================================== --- trunk/faxpp/src/elementdecl.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/elementdecl.c 2008-03-14 15:24:54 UTC (rev 43) @@ -178,7 +178,7 @@ env->state = elementdecl_any_state1; break; case '(': - env->nesting_level += 1; + env->elemdecl_content_level += 1; env->stored_state = elementdecl_mixed_or_children_state; env->state = ws_state; report_empty_token(ELEMENTDECL_LPAR_TOKEN, env); @@ -226,7 +226,7 @@ switch(env->current_char) { case '(': - env->nesting_level += 1; + env->elemdecl_content_level += 1; env->stored_state = elementdecl_cp_name_state1; env->state = ws_state; report_empty_token(ELEMENTDECL_LPAR_TOKEN, env); @@ -359,7 +359,7 @@ { read_char(env); - if(env->nesting_level == 0) + if(env->elemdecl_content_level == 0) env->stored_state = elementdecl_end_state; else env->stored_state = elementdecl_cp_separator_or_end_state; @@ -401,7 +401,7 @@ report_empty_token(ELEMENTDECL_COMMA_TOKEN, env); break; case ')': - env->nesting_level -= 1; + env->elemdecl_content_level -= 1; env->state = elementdecl_cp_cardinality_state; report_empty_token(ELEMENTDECL_RPAR_TOKEN, env); break; @@ -428,7 +428,7 @@ switch(env->current_char) { case ')': - env->nesting_level -= 1; + env->elemdecl_content_level -= 1; env->state = elementdecl_pcdata_optional_star_state; report_empty_token(ELEMENTDECL_RPAR_TOKEN, env); break; @@ -471,7 +471,7 @@ switch(env->current_char) { case ')': - env->nesting_level -= 1; + env->elemdecl_content_level -= 1; env->state = elementdecl_pcdata_star_state; report_empty_token(ELEMENTDECL_RPAR_TOKEN, env); break; Modified: trunk/faxpp/src/error.c =================================================================== --- trunk/faxpp/src/error.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/error.c 2008-03-14 15:24:54 UTC (rev 43) @@ -131,6 +131,8 @@ return "INVALID_DEFAULTDECL"; case INVALID_ELEMENTDECL_CONTENT: return "INVALID_ELEMENTDECL_CONTENT"; + case INVALID_CONDITIONAL_SECTION: + return "INVALID_CONDITIONAL_SECTION"; case NO_ERROR: break; } Modified: trunk/faxpp/src/token.c =================================================================== --- trunk/faxpp/src/token.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/token.c 2008-03-14 15:24:54 UTC (rev 43) @@ -134,10 +134,32 @@ return "ATTLISTDECL_ATTDEF_PREFIX_TOKEN"; case ATTLISTDECL_ATTDEF_NAME_TOKEN: return "ATTLISTDECL_ATTDEF_NAME_TOKEN"; + case ATTLISTDECL_ATTTYPE_ENTITY_TOKEN: + return "ATTLISTDECL_ATTTYPE_ENTITY_TOKEN"; + case ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN: + return "ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN"; + case ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN: + return "ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN"; + case ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN: + return "ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN"; + case ATTLISTDECL_ATTTYPE_ID_TOKEN: + return "ATTLISTDECL_ATTTYPE_ID_TOKEN"; + case ATTLISTDECL_ATTTYPE_IDREF_TOKEN: + return "ATTLISTDECL_ATTTYPE_IDREF_TOKEN"; + case ATTLISTDECL_ATTTYPE_IDREFS_TOKEN: + return "ATTLISTDECL_ATTTYPE_IDREFS_TOKEN"; + case ATTLISTDECL_ATTTYPE_CDATA_TOKEN: + return "ATTLISTDECL_ATTTYPE_CDATA_TOKEN"; case ATTLISTDECL_NOTATION_NAME_TOKEN: return "ATTLISTDECL_NOTATION_NAME_TOKEN"; case ATTLISTDECL_ENUMERATION_NAME_TOKEN: return "ATTLISTDECL_ENUMERATION_NAME_TOKEN"; + case ATTLISTDECL_DEFAULT_IMPLIED_TOKEN: + return "ATTLISTDECL_DEFAULT_IMPLIED_TOKEN"; + case ATTLISTDECL_DEFAULT_REQUIRED_TOKEN: + return "ATTLISTDECL_DEFAULT_REQUIRED_TOKEN"; + case ATTLISTDECL_DEFAULT_FIXED_TOKEN: + return "ATTLISTDECL_DEFAULT_FIXED_TOKEN"; case ATTLISTDECL_END_TOKEN: return "ATTLISTDECL_END_TOKEN"; Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/tokenizer_states.c 2008-03-14 15:24:54 UTC (rev 43) @@ -435,6 +435,10 @@ else if(state == external_subset_state) return "external_subset_state"; + else if(state == external_subset_seen_rsquare_state1) + return "external_subset_seen_rsquare_state1"; + else if(state == external_subset_seen_rsquare_state2) + return "external_subset_seen_rsquare_state2"; else if(state == external_subset_markup_state) return "external_subset_markup_state"; else if(state == external_subset_decl_state) @@ -789,6 +793,43 @@ else if(state == paramentitydecl_end_state) return "paramentitydecl_end_state"; + else if(state == conditional_state1) + return "conditional_state1"; + else if(state == conditional_state2) + return "conditional_state2"; + else if(state == ignore_state1) + return "ignore_state1"; + else if(state == ignore_state2) + return "ignore_state2"; + else if(state == ignore_state3) + return "ignore_state3"; + else if(state == ignore_state4) + return "ignore_state4"; + else if(state == ignore_state5) + return "ignore_state5"; + else if(state == ignore_content_state) + return "ignore_content_state"; + else if(state == ignore_content_seen_lt_state) + return "ignore_content_seen_lt_state"; + else if(state == ignore_content_seen_bang_state) + return "ignore_content_seen_bang_state"; + else if(state == ignore_content_seen_rsquare_state1) + return "ignore_content_seen_rsquare_state1"; + else if(state == ignore_content_seen_rsquare_state2) + return "ignore_content_seen_rsquare_state2"; + else if(state == include_state1) + return "include_state1"; + else if(state == include_state2) + return "include_state2"; + else if(state == include_state3) + return "include_state3"; + else if(state == include_state4) + return "include_state4"; + else if(state == include_state5) + return "include_state5"; + else if(state == include_state6) + return "include_state6"; + return "unknown"; } #endif Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/tokenizer_states.h 2008-03-14 15:24:54 UTC (rev 43) @@ -266,6 +266,8 @@ FAXPP_Error internal_subset_decl_state(FAXPP_TokenizerEnv *env); FAXPP_Error external_subset_state(FAXPP_TokenizerEnv *env); +FAXPP_Error external_subset_seen_rsquare_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error external_subset_seen_rsquare_state2(FAXPP_TokenizerEnv *env); FAXPP_Error external_subset_markup_state(FAXPP_TokenizerEnv *env); FAXPP_Error external_subset_decl_state(FAXPP_TokenizerEnv *env); @@ -446,7 +448,26 @@ FAXPP_Error paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env); FAXPP_Error paramentitydecl_end_state(FAXPP_TokenizerEnv *env); +FAXPP_Error conditional_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error conditional_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_content_seen_lt_state(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_content_seen_bang_state(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_content_seen_rsquare_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_content_seen_rsquare_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error include_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error include_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error include_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error include_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error include_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error include_state6(FAXPP_TokenizerEnv *env); + /********************* * * Tokenizer Helper Functions @@ -527,20 +548,18 @@ #define base_state(env) \ { \ - if((env)->nesting_level != 0) \ + if((env)->external_subset || (env)->external_dtd_entity) \ + (env)->state = external_subset_state; \ + else if((env)->nesting_level != 0) \ (env)->state = (env)->element_content_state; \ - else if((env)->element_entity) \ - (env)->state = parsed_entity_state; \ else if((env)->internal_dtd_entity) \ (env)->state = internal_subset_state_en; \ - else if((env)->external_parsed_entity) \ + else if((env)->element_entity || (env)->external_parsed_entity) \ (env)->state = parsed_entity_state; \ else if((env)->seen_doc_element) \ (env)->state = final_state; \ else if((env)->internal_subset) \ (env)->state = internal_subset_state; \ - else if((env)->external_subset) \ - (env)->state = external_subset_state; \ else (env)->state = initial_misc_state; \ } Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/xml_parser.c 2008-03-14 15:24:54 UTC (rev 43) @@ -111,9 +111,16 @@ FAXPP_AttrValue *at; FAXPP_ElementInfo *el; FAXPP_NamespaceInfo *ns; + FAXPP_ContentSpec *cs; if(env->attrs) free(env->attrs); + while(env->current_elementdecl) { + cs = env->current_elementdecl; + env->current_elementdecl = cs->parent; + free(cs); + } + while(env->av_dealloc) { at = env->av_dealloc; env->av_dealloc = at->dealloc_next; @@ -248,10 +255,23 @@ { FAXPP_ElementInfo *el; FAXPP_NamespaceInfo *ns; + FAXPP_ContentSpec *cs; env->tenv->buffered_token = 0; env->tenv->user_provided_decode = 0; + // Free the elementdecl stack + while(env->current_elementdecl) { + cs = env->current_elementdecl; + env->current_elementdecl = cs->parent; + free(cs); + } + + env->current_attr = 0; + env->current_entity = 0; + env->current_attlist = 0; + env->current_notation = 0; + // Put the element info objects back in the pool while(env->element_info_stack) { el = env->element_info_stack; @@ -408,13 +428,15 @@ //////////////////////////////////////////////////////////////////////////////////////////////////// -static void p_text_change_buffer(FAXPP_Buffer *buffer, void *newBuffer, FAXPP_Text *text) +static void p_change_buffer(FAXPP_Buffer *buffer, void *newBuffer, void **text) { - if(text->ptr >= buffer->buffer && text->ptr < (buffer->buffer + buffer->length)) { - text->ptr += newBuffer - buffer->buffer; + if(*text >= buffer->buffer && *text < (buffer->buffer + buffer->length)) { + *text += newBuffer - buffer->buffer; } } +#define p_text_change_buffer(buffer, newBuffer, text) p_change_buffer((buffer), (newBuffer), &(text)->ptr) + static void p_change_event_buffer(void *userData, FAXPP_Buffer *buffer, void *newBuffer) { unsigned int i; @@ -487,13 +509,19 @@ while(tokenizer) { p_text_change_buffer(buffer, newBuffer, &tokenizer->base_uri); + // The tokenizer buffer can also point into the entity_buffer, so change that too + p_change_buffer(buffer, newBuffer, &tokenizer->buffer); + p_change_buffer(buffer, newBuffer, &tokenizer->buffer_end); + p_change_buffer(buffer, newBuffer, &tokenizer->position); + tokenizer = tokenizer->prev; } } #define p_move_text_to_buffer(env, text, buf) \ { \ - if((text)->ptr >= (env)->tenv->buffer && (text)->ptr < (env)->tenv->buffer_end) { \ + if((text)->ptr >= (env)->tenv->buffer && (text)->ptr < (env)->tenv->buffer_end && \ + ((text)->ptr < (buf)->buffer || (text)->ptr >= ((buf)->buffer + (buf)->length))) { \ void *newPtr = (buf)->cursor; \ FAXPP_Error err = FAXPP_buffer_append((buf), (text)->ptr, (text)->len); \ if((env)->tenv->null_terminate && err == 0) \ @@ -915,10 +943,10 @@ unsigned int readlen; FAXPP_Error err; + err = FAXPP_release_buffer(env, 0); + if(err != 0) return err; + if(env->tenv->read && !env->tenv->buffer_done) { - err = FAXPP_release_buffer(env, 0); - if(err != 0) return err; - if(env->tenv->position < env->tenv->buffer_end) { // We're half way through a charcter, so we need to copy // the partial char to the begining of the buffer to keep @@ -942,7 +970,7 @@ if(env->tenv->attr_entity) { // TBD default attr values - jpcs if(!env->tenv->prev->internal_subset && !env->tenv->prev->external_subset && - !env->tenv->prev->internal_dtd_entity) { + !env->tenv->prev->internal_dtd_entity && !env->tenv->prev->external_dtd_entity) { err = p_set_attr_value_name_from_entity(env->current_attr, env, ENTITY_REFERENCE_END_EVENT, env->tenv->entity); if(err) return err; } @@ -1266,7 +1294,14 @@ } if(ent->external) { - return p_parse_external_entity(env, ent, state - INTERNAL_DIFF); + switch(state) { + case ELEMENT_CONTENT_ENTITY: state = EXTERNAL_PARSED_ENTITY; break; + case INTERNAL_DTD_ENTITY: state = EXTERNAL_SUBSET_ENTITY; break; + case EXTERNAL_DTD_ENTITY: state = EXTERNAL_SUBSET_ENTITY; break; + default: break; + } + + return p_parse_external_entity(env, ent, state); } return p_parse_internal_entity(env, ent, state, &ent); @@ -1314,6 +1349,7 @@ { FAXPP_EntityInfo *ent; FAXPP_EntityValue *entv; + FAXPP_ContentSpec *cs; FAXPP_Text bkup_system, bkup_public; Char32 ch; FAXPP_Error err = 0; @@ -1467,6 +1503,7 @@ entv->value.len = env->entity_buffer.cursor - entv->value.ptr; } else if(env->current_attlist) { + // General entities in ATTLIST values should be looked up straight away ent = p_find_entity_info(&env->tenv->result_token.value, env->general_entities); if(ent == 0) { err = UNDEFINED_ENTITY; @@ -1518,7 +1555,7 @@ p_set_text_from_text(&bkup_system, &env->event.system_id); p_set_text_from_text(&bkup_public, &env->event.public_id); - err = p_parse_entity(env, ent, INTERNAL_DTD_ENTITY); + err = p_parse_entity(env, ent, env->tenv->internal_subset ? INTERNAL_DTD_ENTITY : EXTERNAL_DTD_ENTITY); p_set_text_from_text(&env->event.system_id, &bkup_system); p_set_text_from_text(&env->event.public_id, &bkup_public); @@ -1527,6 +1564,36 @@ } break; + case ELEMENTDECL_LPAR_TOKEN: + cs = (FAXPP_ContentSpec*)malloc(sizeof(FAXPP_ContentSpec)); + memset(cs, 0, sizeof(FAXPP_ContentSpec)); + cs->parent = env->current_elementdecl; + env->current_elementdecl = cs; + break; + case ELEMENTDECL_RPAR_TOKEN: + cs = env->current_elementdecl; + env->current_elementdecl = cs->parent; + free(cs); + break; + case ELEMENTDECL_BAR_TOKEN: + if(env->current_elementdecl->type == CONTENTSPEC_NONE) { + env->current_elementdecl->type = CONTENTSPEC_CHOICE; + } + else if(env->current_elementdecl->type != CONTENTSPEC_CHOICE) { + err = INVALID_ELEMENTDECL_CONTENT; + goto error; + } + break; + case ELEMENTDECL_COMMA_TOKEN: + if(env->current_elementdecl->type == CONTENTSPEC_NONE) { + env->current_elementdecl->type = CONTENTSPEC_SEQUENCE; + } + else if(env->current_elementdecl->type != CONTENTSPEC_SEQUENCE) { + err = INVALID_ELEMENTDECL_CONTENT; + goto error; + } + break; + case ATTLISTDECL_PREFIX_TOKEN: case ATTLISTDECL_NAME_TOKEN: env->current_attlist = 1; @@ -1560,24 +1627,32 @@ case DOCTYPE_NAME_TOKEN: p_copy_text_from_token(&env->event.name, env, /*useTokenBuffer*/0); break; + case ELEMENTDECL_PREFIX_TOKEN: case ELEMENTDECL_NAME_TOKEN: case ELEMENTDECL_EMPTY_TOKEN: case ELEMENTDECL_ANY_TOKEN: case ELEMENTDECL_PCDATA_TOKEN: - case ELEMENTDECL_LPAR_TOKEN: - case ELEMENTDECL_RPAR_TOKEN: case ELEMENTDECL_QUESTION_TOKEN: case ELEMENTDECL_STAR_TOKEN: case ELEMENTDECL_PLUS_TOKEN: - case ELEMENTDECL_BAR_TOKEN: - case ELEMENTDECL_COMMA_TOKEN: case ELEMENTDECL_END_TOKEN: case ATTLISTDECL_ATTDEF_PREFIX_TOKEN: case ATTLISTDECL_ATTDEF_NAME_TOKEN: + case ATTLISTDECL_ATTTYPE_ENTITY_TOKEN: + case ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN: + case ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN: + case ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN: + case ATTLISTDECL_ATTTYPE_ID_TOKEN: + case ATTLISTDECL_ATTTYPE_IDREF_TOKEN: + case ATTLISTDECL_ATTTYPE_IDREFS_TOKEN: + case ATTLISTDECL_ATTTYPE_CDATA_TOKEN: case ATTLISTDECL_NOTATION_NAME_TOKEN: case ATTLISTDECL_ENUMERATION_NAME_TOKEN: + case ATTLISTDECL_DEFAULT_IMPLIED_TOKEN: + case ATTLISTDECL_DEFAULT_REQUIRED_TOKEN: + case ATTLISTDECL_DEFAULT_FIXED_TOKEN: // Ignore for now break; case COMMENT_TOKEN: @@ -1641,8 +1716,19 @@ case ATTLISTDECL_NAME_TOKEN: case ATTLISTDECL_ATTDEF_PREFIX_TOKEN: case ATTLISTDECL_ATTDEF_NAME_TOKEN: + case ATTLISTDECL_ATTTYPE_ENTITY_TOKEN: + case ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN: + case ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN: + case ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN: + case ATTLISTDECL_ATTTYPE_ID_TOKEN: + case ATTLISTDECL_ATTTYPE_IDREF_TOKEN: + case ATTLISTDECL_ATTTYPE_IDREFS_TOKEN: + case ATTLISTDECL_ATTTYPE_CDATA_TOKEN: case ATTLISTDECL_NOTATION_NAME_TOKEN: case ATTLISTDECL_ENUMERATION_NAME_TOKEN: + case ATTLISTDECL_DEFAULT_IMPLIED_TOKEN: + case ATTLISTDECL_DEFAULT_REQUIRED_TOKEN: + case ATTLISTDECL_DEFAULT_FIXED_TOKEN: case ATTLISTDECL_END_TOKEN: case NOTATIONDECL_NAME_TOKEN: case NOTATIONDECL_END_TOKEN: Modified: trunk/faxpp/src/xml_parser.h =================================================================== --- trunk/faxpp/src/xml_parser.h 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/xml_parser.h 2008-03-14 15:24:54 UTC (rev 43) @@ -75,6 +75,19 @@ FAXPP_EntityInfo *next; }; +typedef enum { + CONTENTSPEC_NONE = 0, + CONTENTSPEC_SEQUENCE, + CONTENTSPEC_CHOICE +} FAXPP_ContentSpecType; + +typedef struct FAXPP_ContentSpec_s FAXPP_ContentSpec; + +struct FAXPP_ContentSpec_s { + FAXPP_ContentSpecType type; + FAXPP_ContentSpec *parent; +}; + typedef struct FAXPP_ParserEnv_s FAXPP_ParserEnv; typedef FAXPP_Error (*FAXPP_NextEvent)(FAXPP_ParserEnv *env); @@ -96,8 +109,10 @@ unsigned int max_attr_count; FAXPP_Attribute *attrs; + FAXPP_Attribute *current_attr; FAXPP_EntityInfo *current_entity; + FAXPP_ContentSpec *current_elementdecl; unsigned int current_attlist:1; unsigned int current_notation:1; Modified: trunk/faxpp/src/xml_tokenizer.c =================================================================== --- trunk/faxpp/src/xml_tokenizer.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/xml_tokenizer.c 2008-03-14 15:24:54 UTC (rev 43) @@ -32,6 +32,8 @@ #define INITIAL_TOKEN_BUFFER_SIZE 64 +#define SNIFF_NEXT_CHAR(buf) (((buf) < (unsigned char*)env->buffer_end) ? *(buf)++ : 0x100) + FAXPP_Error FAXPP_sniff_encoding(FAXPP_Tokenizer *env) { @@ -41,13 +43,13 @@ /* printf("First bytes: %02X %02X %02X %02X\n", *buf, *(buf + 1), */ /* *(buf + 2), *(buf + 3)); */ - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x3C: /* 00 00 00 3C UCS-4, big-endian machine (1234 order) */ #ifdef WORDS_BIGENDIAN @@ -59,14 +61,14 @@ } break; case 0x3C: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: /* 00 00 3C 00 UCS-4, unusual octet order (2143) */ return UNSUPPORTED_ENCODING; } break; case 0xFE: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xFF: /* 00 00 FE FF UCS-4, big-endian machine (1234 order) */ #ifdef WORDS_BIGENDIAN @@ -80,7 +82,7 @@ } break; case 0xFF: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xFE: /* 00 00 FF FE UCS-4, unusual octet order (2143) */ return UNSUPPORTED_ENCODING; @@ -89,9 +91,9 @@ } break; case 0x3C: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: /* 00 3C 00 00 UCS-4, unusual octet order (3412) */ return UNSUPPORTED_ENCODING; @@ -110,11 +112,11 @@ } break; case 0x3C: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: /* 3C 00 00 00 UCS-4, little-endian machine (4321 order) */ #ifdef WORDS_BIGENDIAN @@ -126,7 +128,7 @@ } break; case 0x3F: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: /* 3C 00 3F 00 UTF-16, little-endian */ #ifdef WORDS_BIGENDIAN @@ -140,9 +142,9 @@ } break; case 0x3F: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x78: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x6D: /* 3C 3F 78 6D UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, etc. */ FAXPP_set_tokenizer_decode(env, FAXPP_utf8_decode); @@ -154,11 +156,11 @@ } break; case 0x4C: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x6F: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xA7: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x94: /* 4C 6F A7 94 EBCDIC */ return UNSUPPORTED_ENCODING; @@ -169,9 +171,9 @@ } break; case 0xEF: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xBB: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xBF: /* EF BB BF UTF-8 with byte order mark */ FAXPP_set_tokenizer_decode(env, FAXPP_utf8_decode); @@ -182,11 +184,11 @@ } break; case 0xFE: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xFF: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: /* FE FF 00 00 UCS-4, unusual octet order (3412) */ return UNSUPPORTED_ENCODING; @@ -217,11 +219,11 @@ } break; case 0xFF: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xFE: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: /* FF FE 00 00 UCS-4, little-endian machine (4321 order) */ #ifdef WORDS_BIGENDIAN @@ -389,6 +391,7 @@ env->column = 0; env->nesting_level = 0; + env->elemdecl_content_level = 0; env->do_encode = 1; env->seen_doctype = 0; @@ -398,6 +401,7 @@ env->element_entity = 0; env->attr_entity = 0; env->internal_dtd_entity = 0; + env->external_dtd_entity = 0; env->external_parsed_entity = 0; env->start_of_entity = 0; @@ -463,6 +467,7 @@ env->element_entity = state == ELEMENT_CONTENT_ENTITY; env->attr_entity = state == ATTRIBUTE_VALUE_ENTITY; env->internal_dtd_entity = state == INTERNAL_DTD_ENTITY; + env->external_dtd_entity = state == EXTERNAL_DTD_ENTITY; env->external_parsed_entity = state == EXTERNAL_PARSED_ENTITY; env->external_subset = state == EXTERNAL_SUBSET_ENTITY; @@ -483,6 +488,9 @@ case INTERNAL_DTD_ENTITY: env->state = internal_subset_state_en; break; + case EXTERNAL_DTD_ENTITY: + env->state = external_subset_state; + break; case EXTERNAL_PARSED_ENTITY: case EXTERNAL_SUBSET_ENTITY: env->state = initial_state; @@ -502,17 +510,21 @@ FAXPP_Error FAXPP_pop_tokenizer(FAXPP_Tokenizer **list) { + FAXPP_Error err = NO_ERROR; FAXPP_TokenizerEnv *env = *list; *list = env->prev; if(env->start_of_entity) { - if(env->stored_state != 0 || env->nesting_level != 0 || + if(env->stored_state != 0 || env->nesting_level != 0 || env->elemdecl_content_level != 0 || (env->element_entity && env->state != parsed_entity_state && env->state != default_element_content_rsquare_state1 && env->state != default_element_content_rsquare_state2) || - (env->internal_dtd_entity && env->state != internal_subset_state_en) + (env->internal_dtd_entity && env->state != internal_subset_state_en) || + (env->external_dtd_entity && env->state != external_subset_state && + env->state != external_subset_seen_rsquare_state1 && + env->state != external_subset_seen_rsquare_state2) ) { - return INCOMPLETE_MARKUP_IN_ENTITY_VALUE; + err = INCOMPLETE_MARKUP_IN_ENTITY_VALUE; } } else { @@ -532,6 +544,7 @@ } (*list)->nesting_level += env->nesting_level; + (*list)->elemdecl_content_level += env->elemdecl_content_level; (*list)->state = env->state; (*list)->stored_state = env->stored_state; @@ -541,7 +554,7 @@ free_tokenizer_internal(env); - return NO_ERROR; + return err; } FAXPP_Error Modified: trunk/faxpp/src/xml_tokenizer.h =================================================================== --- trunk/faxpp/src/xml_tokenizer.h 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/xml_tokenizer.h 2008-03-14 15:24:54 UTC (rev 43) @@ -51,6 +51,7 @@ unsigned int column; unsigned int nesting_level; + unsigned int elemdecl_content_level; unsigned int do_encode:1; unsigned int buffer_done:1; @@ -61,6 +62,7 @@ unsigned int element_entity:1; unsigned int attr_entity:1; unsigned int internal_dtd_entity:1; + unsigned int external_dtd_entity:1; unsigned int external_parsed_entity:1; unsigned int normalize_attrs:1; @@ -97,15 +99,14 @@ struct FAXPP_TokenizerEnv_s *prev; }; -#define INTERNAL_DIFF 5 - // The first two values are the same as the values in FAXPP_EntityType typedef enum { EXTERNAL_PARSED_ENTITY2 = EXTERNAL_PARSED_ENTITY, EXTERNAL_SUBSET_ENTITY2 = EXTERNAL_SUBSET_ENTITY, - ELEMENT_CONTENT_ENTITY = EXTERNAL_PARSED_ENTITY + INTERNAL_DIFF, - INTERNAL_DTD_ENTITY = EXTERNAL_SUBSET_ENTITY + INTERNAL_DIFF, + ELEMENT_CONTENT_ENTITY, + INTERNAL_DTD_ENTITY, + EXTERNAL_DTD_ENTITY, ATTRIBUTE_VALUE_ENTITY } FAXPP_EntityParseState; Modified: trunk/faxpp/tests/xmlconf_runner.c =================================================================== --- trunk/faxpp/tests/xmlconf_runner.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/tests/xmlconf_runner.c 2008-03-14 15:24:54 UTC (rev 43) @@ -29,7 +29,7 @@ if(line != 0) { output_text(FAXPP_get_base_uri(parser), stderr); - fprintf(stderr, ":%03d:%03d FAXPP_Error: %s\n", line, FAXPP_get_error_column(parser), FAXPP_err_to_string(err)); + fprintf(stderr, ":%d:%d FAXPP_Error: %s\n", line, FAXPP_get_error_column(parser), FAXPP_err_to_string(err)); } else { fprintf(stderr, "FAXPP_Error: %s\n", FAXPP_err_to_string(err)); } @@ -76,8 +76,11 @@ *ptr = 0; } -FAXPP_Error run_test_case(const char *filename, unsigned int *errLine) +FAXPP_Error run_test_case(const char *filename, char *errFileBuffer, unsigned int bufLen, unsigned int *errLine, unsigned int *errColumn) { + const FAXPP_Text *text; + unsigned int len; + FAXPP_Parser *testparser = FAXPP_create_parser(WELL_FORMED_PARSE_MODE, FAXPP_utf8_transcoder); FILE *file = fopen(filename, "r"); @@ -100,7 +103,13 @@ } if(err != NO_ERROR) { + text = FAXPP_get_base_uri(testparser); + len = text->len < bufLen - 1 ? text->len : bufLen - 1; + memcpy(errFileBuffer, text->ptr, len); + errFileBuffer[len] = 0; + *errLine = FAXPP_get_error_line(testparser); + *errColumn = FAXPP_get_error_column(testparser); } fclose(file); @@ -118,7 +127,9 @@ char base_buffer[1024]; char file_buffer[1024]; FAXPP_Error result; + char errFileBuffer[1024]; unsigned int errLine; + unsigned int errColumn; int output_events = 0; int test_failures = 0; @@ -191,7 +202,7 @@ attr = find_attribute(event, "URI"); calculateBase(base_buffer, &attr->value, file_buffer); - result = run_test_case(file_buffer, &errLine); + result = run_test_case(file_buffer, errFileBuffer, sizeof(errFileBuffer), &errLine, &errColumn); // Skip tests that require no namespaces attr = find_attribute(event, "NAMESPACE"); @@ -251,7 +262,7 @@ } if(result != NO_ERROR) { - fprintf(stderr, "\nError: %s:%i", FAXPP_err_to_string(result), errLine); + fprintf(stderr, "\nError: %s:%d:%d %s\n", errFileBuffer, errLine, errColumn, FAXPP_err_to_string(result)); } fprintf(stderr, "\n"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |