[Faxpp-devel] SF.net SVN: faxpp: [47] trunk/faxpp
Status: Beta
Brought to you by:
jpcs
From: <jp...@us...> - 2008-03-20 01:56:14
|
Revision: 47 http://faxpp.svn.sourceforge.net/faxpp/?rev=47&view=rev Author: jpcs Date: 2008-03-19 18:56:21 -0700 (Wed, 19 Mar 2008) Log Message: ----------- Added recognition of parameter entities in entity and notation declarations. Raise errors for redeclaration of "xml" and "xmlns" namespaces. Validate the value of "xml:space" attributes. Modified Paths: -------------- trunk/faxpp/Makefile.am trunk/faxpp/Makefile.in trunk/faxpp/TODO trunk/faxpp/include/faxpp/error.h trunk/faxpp/src/doctype.c trunk/faxpp/src/entitydecl.c trunk/faxpp/src/error.c trunk/faxpp/src/notationdecl.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/tests/xmlconf_runner.c Added Paths: ----------- trunk/faxpp/src/system_public_states.h Modified: trunk/faxpp/Makefile.am =================================================================== --- trunk/faxpp/Makefile.am 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/Makefile.am 2008-03-20 01:56:21 UTC (rev 47) @@ -37,7 +37,8 @@ src/attlistdecl.c \ src/notationdecl.c \ src/entitydecl.c \ -src/conditional.c +src/conditional.c \ +src/system_public_states.h tokenizer_example_LDADD = libfaxpp.la tokenizer_example_SOURCES = examples/tokenizer_example.c @@ -46,13 +47,17 @@ parser_example_SOURCES = \ examples/parser_example.c \ examples/entity_resolver.c \ -examples/output_event.c +examples/entity_resolver.h \ +examples/output_event.c \ +examples/output_event.h xmlconf_runner_LDADD = libfaxpp.la xmlconf_runner_SOURCES = \ tests/xmlconf_runner.c \ examples/entity_resolver.c \ -examples/output_event.c +examples/entity_resolver.h \ +examples/output_event.c \ +examples/output_event.h EXTRA_DIST = \ docs/Doxyfile.api \ Modified: trunk/faxpp/Makefile.in =================================================================== --- trunk/faxpp/Makefile.in 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/Makefile.in 2008-03-20 01:56:21 UTC (rev 47) @@ -260,7 +260,8 @@ src/attlistdecl.c \ src/notationdecl.c \ src/entitydecl.c \ -src/conditional.c +src/conditional.c \ +src/system_public_states.h tokenizer_example_LDADD = libfaxpp.la tokenizer_example_SOURCES = examples/tokenizer_example.c @@ -268,13 +269,17 @@ parser_example_SOURCES = \ examples/parser_example.c \ examples/entity_resolver.c \ -examples/output_event.c +examples/entity_resolver.h \ +examples/output_event.c \ +examples/output_event.h xmlconf_runner_LDADD = libfaxpp.la xmlconf_runner_SOURCES = \ tests/xmlconf_runner.c \ examples/entity_resolver.c \ -examples/output_event.c +examples/entity_resolver.h \ +examples/output_event.c \ +examples/output_event.h EXTRA_DIST = \ docs/Doxyfile.api \ Modified: trunk/faxpp/TODO =================================================================== --- trunk/faxpp/TODO 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/TODO 2008-03-20 01:56:21 UTC (rev 47) @@ -1,17 +1,11 @@ Small tasks ----------- -Don't accept Namespace 1.1 undefines in XML 1.0 mode Normalize line endings in element character content / PI values / comment values Accept XML 1.1 line endings as whitespace -Handle "xml" namespace properly -xml:space value checking -Error for redefining "xml" namespace -Error for defining "xmlns" namespace -Parse element decls correctly -Parse parameter entities in markup correctly Large tasks ----------- +Attribute default values DTD validation Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/include/faxpp/error.h 2008-03-20 01:56:21 UTC (rev 47) @@ -75,7 +75,9 @@ ELEMENT_NAME_MISMATCH, NO_URI_FOR_PREFIX, - DUPLICATE_ATTRIBUTES + DUPLICATE_ATTRIBUTES, + INVALID_NAMESPACE_DECLARATION, + INVALID_XMLSPACE_VALUE } FAXPP_Error; /** Modified: trunk/faxpp/src/doctype.c =================================================================== --- trunk/faxpp/src/doctype.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/doctype.c 2008-03-20 01:56:21 UTC (rev 47) @@ -198,12 +198,10 @@ WHITESPACE: break; case 'S': - env->stored_state = doctype_internal_subset_start_state; - env->state = system_id_initial_state1; + env->state = doctype_system_id_initial_state1; break; case 'P': - env->stored_state = doctype_internal_subset_start_state; - env->state = public_id_initial_state1; + env->state = doctype_public_id_initial_state1; break; case '[': env->state = internal_subset_state; @@ -223,292 +221,15 @@ return NO_ERROR; } -SINGLE_CHAR_STATE(system_id_initial_state1, 'Y', 0, system_id_initial_state2, INVALID_SYSTEM_ID) -SINGLE_CHAR_STATE(system_id_initial_state2, 'S', 0, system_id_initial_state3, INVALID_SYSTEM_ID) -SINGLE_CHAR_STATE(system_id_initial_state3, 'T', 0, system_id_initial_state4, INVALID_SYSTEM_ID) -SINGLE_CHAR_STATE(system_id_initial_state4, 'E', 0, system_id_initial_state5, INVALID_SYSTEM_ID) -SINGLE_CHAR_STATE(system_id_initial_state5, 'M', 0, system_id_ws_state, INVALID_SYSTEM_ID) +#define PREFIX(name) doctype_ ## name +#define END_STATE doctype_internal_subset_start_state -FAXPP_Error -system_id_ws_state(FAXPP_TokenizerEnv *env) -{ - read_char(env); +#include "system_public_states.h" - switch(env->current_char) { - WHITESPACE: - env->state = system_literal_start_state; - next_char(env); - break; - default: - env->state = system_literal_start_state; - return EXPECTING_WHITESPACE; - } - return NO_ERROR; -} +#undef END_STATE +#undef PREFIX FAXPP_Error -system_literal_start_state(FAXPP_TokenizerEnv *env) -{ - read_char(env); - - switch(env->current_char) { - WHITESPACE: - next_char(env); - return NO_ERROR; - case '"': - env->state = system_literal_quot_state; - break; - case '\'': - env->state = system_literal_apos_state; - break; - default: - next_char(env); - return EXPECTING_SYSTEM_LITERAL; - } - next_char(env); - token_start_position(env); - return NO_ERROR; -} - -FAXPP_Error -system_literal_apos_state(FAXPP_TokenizerEnv *env) -{ - while(1) { - read_char(env); - - switch(env->current_char) { - case '\'': - retrieve_state(env); - token_end_position(env); - report_token(SYSTEM_LITERAL_TOKEN, env); - next_char(env); - return NO_ERROR; - LINE_ENDINGS - default: - if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { - next_char(env); - return RESTRICTED_CHAR; - } - break; - } - next_char(env); - } - - // Never happens - return NO_ERROR; -} - -FAXPP_Error -system_literal_quot_state(FAXPP_TokenizerEnv *env) -{ - while(1) { - read_char(env); - - switch(env->current_char) { - case '"': - retrieve_state(env); - token_end_position(env); - report_token(SYSTEM_LITERAL_TOKEN, env); - next_char(env); - return NO_ERROR; - LINE_ENDINGS - default: - if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { - next_char(env); - return RESTRICTED_CHAR; - } - break; - } - next_char(env); - } - - // Never happens - return NO_ERROR; -} - -SINGLE_CHAR_STATE(public_id_initial_state1, 'U', 0, public_id_initial_state2, INVALID_PUBLIC_ID) -SINGLE_CHAR_STATE(public_id_initial_state2, 'B', 0, public_id_initial_state3, INVALID_PUBLIC_ID) -SINGLE_CHAR_STATE(public_id_initial_state3, 'L', 0, public_id_initial_state4, INVALID_PUBLIC_ID) -SINGLE_CHAR_STATE(public_id_initial_state4, 'I', 0, public_id_initial_state5, INVALID_PUBLIC_ID) -SINGLE_CHAR_STATE(public_id_initial_state5, 'C', 0, public_id_ws_state, INVALID_PUBLIC_ID) - -FAXPP_Error -public_id_ws_state(FAXPP_TokenizerEnv *env) -{ - read_char(env); - - switch(env->current_char) { - WHITESPACE: - env->state = pubid_literal_start_state; - next_char(env); - break; - default: - env->state = pubid_literal_start_state; - return EXPECTING_WHITESPACE; - } - return NO_ERROR; -} - -FAXPP_Error -pubid_literal_start_state(FAXPP_TokenizerEnv *env) -{ - read_char(env); - - switch(env->current_char) { - WHITESPACE: - next_char(env); - return NO_ERROR; - case '"': - env->state = pubid_literal_quot_state; - break; - case '\'': - env->state = pubid_literal_apos_state; - break; - default: - next_char(env); - return EXPECTING_PUBID_LITERAL; - } - next_char(env); - token_start_position(env); - return NO_ERROR; -} - -FAXPP_Error -pubid_literal_apos_state(FAXPP_TokenizerEnv *env) -{ - while(1) { - read_char(env); - - switch(env->current_char) { - case '\'': - env->state = public_id_ws_state2; - token_end_position(env); - report_token(PUBID_LITERAL_TOKEN, env); - next_char(env); - return NO_ERROR; - // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] - LINE_ENDINGS - // A-Z - case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: - case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: - case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: - case 0x58: case 0x59: case 0x5A: - // a-z - case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: - case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: - case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: - case 0x78: case 0x79: case 0x7A: - // 0-9 - case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': - case '9': - case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':': - case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_': - case '%': - // Valid PubidChar - break; - default: - next_char(env); - return INVALID_CHAR_IN_PUBID_LITERAL; - } - next_char(env); - } - - // Never happens - return NO_ERROR; -} - -FAXPP_Error -pubid_literal_quot_state(FAXPP_TokenizerEnv *env) -{ - while(1) { - read_char(env); - - switch(env->current_char) { - case '"': - env->state = public_id_ws_state2; - token_end_position(env); - report_token(PUBID_LITERAL_TOKEN, env); - next_char(env); - return NO_ERROR; - // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] - LINE_ENDINGS - // A-Z - case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: - case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: - case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: - case 0x58: case 0x59: case 0x5A: - // a-z - case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: - case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: - case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: - case 0x78: case 0x79: case 0x7A: - // 0-9 - case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': - case '9': - case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':': - case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_': - case '%': case '\'': - // Valid PubidChar - break; - default: - next_char(env); - return INVALID_CHAR_IN_PUBID_LITERAL; - } - next_char(env); - } - - // Never happens - return NO_ERROR; -} - -FAXPP_Error -public_id_ws_state2(FAXPP_TokenizerEnv *env) -{ - read_char(env); - - switch(env->current_char) { - WHITESPACE: - env->state = public_id_ws_state3; - next_char(env); - break; - case '>': - if(env->stored_state == notationdecl_end_state) { - // Notation decls can skip the system literal - retrieve_state(env); - return NO_ERROR; - } - // Fall through - default: - env->state = system_literal_start_state; - return EXPECTING_WHITESPACE; - } - return NO_ERROR; -} - -FAXPP_Error -public_id_ws_state3(FAXPP_TokenizerEnv *env) -{ - read_char(env); - - switch(env->current_char) { - WHITESPACE: - next_char(env); - break; - case '>': - if(env->stored_state == notationdecl_end_state) { - // Notation decls can skip the system literal - retrieve_state(env); - return NO_ERROR; - } - // Fall through - default: - env->state = system_literal_start_state; - break; - } - return NO_ERROR; -} - -FAXPP_Error doctype_internal_subset_start_state(FAXPP_TokenizerEnv *env) { read_char(env); Modified: trunk/faxpp/src/entitydecl.c =================================================================== --- trunk/faxpp/src/entitydecl.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/entitydecl.c 2008-03-20 01:56:21 UTC (rev 47) @@ -111,12 +111,10 @@ token_start_position(env); return NO_ERROR; case 'S': - env->stored_state = entitydecl_ws_state; - env->state = system_id_initial_state1; + env->state = entitydecl_system_id_initial_state1; break; case 'P': - env->stored_state = entitydecl_ws_state; - env->state = public_id_initial_state1; + env->state = entitydecl_public_id_initial_state1; break; LINE_ENDINGS default: @@ -127,6 +125,16 @@ return NO_ERROR; } +#define PREFIX(name) entitydecl_ ## name +#define END_STATE entitydecl_ws_state +#define ALLOW_PARAMETER_ENTITIES + +#include "system_public_states.h" + +#undef ALLOW_PARAMETER_ENTITIES +#undef END_STATE +#undef PREFIX + FAXPP_Error entitydecl_value_apos_state(FAXPP_TokenizerEnv *env) { @@ -422,12 +430,10 @@ token_start_position(env); return NO_ERROR; case 'S': - env->stored_state = paramentitydecl_end_state; - env->state = system_id_initial_state1; + env->state = paramentitydecl_system_id_initial_state1; break; case 'P': - env->stored_state = paramentitydecl_end_state; - env->state = public_id_initial_state1; + env->state = paramentitydecl_public_id_initial_state1; break; LINE_ENDINGS default: @@ -438,6 +444,16 @@ return NO_ERROR; } +#define PREFIX(name) paramentitydecl_ ## name +#define END_STATE paramentitydecl_end_state +#define ALLOW_PARAMETER_ENTITIES + +#include "system_public_states.h" + +#undef ALLOW_PARAMETER_ENTITIES +#undef END_STATE +#undef PREFIX + FAXPP_Error paramentitydecl_value_apos_state(FAXPP_TokenizerEnv *env) { Modified: trunk/faxpp/src/error.c =================================================================== --- trunk/faxpp/src/error.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/error.c 2008-03-20 01:56:21 UTC (rev 47) @@ -137,6 +137,10 @@ return "IMPROPER_NESTING_OF_ENTITY"; case PARAMETER_ENTITY_IN_INTERNAL_SUBSET: return "PARAMETER_ENTITY_IN_INTERNAL_SUBSET"; + case INVALID_NAMESPACE_DECLARATION: + return "INVALID_NAMESPACE_DECLARATION"; + case INVALID_XMLSPACE_VALUE: + return "INVALID_XMLSPACE_VALUE"; case NO_ERROR: break; } Modified: trunk/faxpp/src/notationdecl.c =================================================================== --- trunk/faxpp/src/notationdecl.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/notationdecl.c 2008-03-20 01:56:21 UTC (rev 47) @@ -97,12 +97,10 @@ switch(env->current_char) { case 'S': - env->stored_state = notationdecl_end_state; - env->state = system_id_initial_state1; + env->state = notationdecl_system_id_initial_state1; break; case 'P': - env->stored_state = notationdecl_end_state; - env->state = public_id_initial_state1; + env->state = notationdecl_public_id_initial_state1; break; LINE_ENDINGS default: @@ -113,6 +111,18 @@ return NO_ERROR; } +#define PREFIX(name) notationdecl_ ## name +#define END_STATE notationdecl_end_state +#define SKIP_SYSTEM_LITERAL +#define ALLOW_PARAMETER_ENTITIES + +#include "system_public_states.h" + +#undef ALLOW_PARAMETER_ENTITIES +#undef SKIP_SYSTEM_LITERAL +#undef END_STATE +#undef PREFIX + FAXPP_Error notationdecl_end_state(FAXPP_TokenizerEnv *env) { Added: trunk/faxpp/src/system_public_states.h =================================================================== --- trunk/faxpp/src/system_public_states.h (rev 0) +++ trunk/faxpp/src/system_public_states.h 2008-03-20 01:56:21 UTC (rev 47) @@ -0,0 +1,392 @@ +/* + * Copyright 2007 Doxological Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This file needs to have a number of macros defined before it is included +// PREFIX(name) +// END_STATE +// SKIP_SYSTEM_LITERAL +// ALLOW_PARAMETER_ENTITIES + +#define SP_SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \ +FAXPP_Error \ +name(FAXPP_TokenizerEnv *env) \ +{ \ + read_char(env); \ +\ + switch(env->current_char) { \ + case (ch): \ + if((next_stored_state) != 0) env->stored_state = (next_stored_state); \ + env->state = (next_state); \ + next_char(env); \ + break; \ + LINE_ENDINGS \ + default: \ + next_char(env); \ + return (error); \ + } \ + return NO_ERROR; \ +} + +SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state1), 'Y', 0, PREFIX(system_id_initial_state2), INVALID_SYSTEM_ID) +SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state2), 'S', 0, PREFIX(system_id_initial_state3), INVALID_SYSTEM_ID) +SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state3), 'T', 0, PREFIX(system_id_initial_state4), INVALID_SYSTEM_ID) +SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state4), 'E', 0, PREFIX(system_id_initial_state5), INVALID_SYSTEM_ID) +SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state5), 'M', 0, PREFIX(system_id_ws_state), INVALID_SYSTEM_ID) + +FAXPP_Error +PREFIX(system_id_ws_state)(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = PREFIX(system_literal_start_state); + next_char(env); + break; +#ifdef ALLOW_PARAMETER_ENTITIES + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; +#endif + default: + env->state = PREFIX(system_literal_start_state); + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +PREFIX(system_literal_start_state)(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + return NO_ERROR; +#ifdef ALLOW_PARAMETER_ENTITIES + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; +#endif + case '"': + env->state = PREFIX(system_literal_quot_state); + break; + case '\'': + env->state = PREFIX(system_literal_apos_state); + break; + default: + next_char(env); + return EXPECTING_SYSTEM_LITERAL; + } + next_char(env); + token_start_position(env); + return NO_ERROR; +} + +FAXPP_Error +PREFIX(system_literal_apos_state)(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + case '\'': + env->state = END_STATE; + token_end_position(env); + report_token(SYSTEM_LITERAL_TOKEN, env); + next_char(env); + return NO_ERROR; + case '#': + next_char(env); + return INVALID_SYSTEM_ID; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +PREFIX(system_literal_quot_state)(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + case '"': + env->state = END_STATE; + token_end_position(env); + report_token(SYSTEM_LITERAL_TOKEN, env); + next_char(env); + return NO_ERROR; + case '#': + next_char(env); + return INVALID_SYSTEM_ID; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state1), 'U', 0, PREFIX(public_id_initial_state2), INVALID_PUBLIC_ID) +SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state2), 'B', 0, PREFIX(public_id_initial_state3), INVALID_PUBLIC_ID) +SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state3), 'L', 0, PREFIX(public_id_initial_state4), INVALID_PUBLIC_ID) +SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state4), 'I', 0, PREFIX(public_id_initial_state5), INVALID_PUBLIC_ID) +SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state5), 'C', 0, PREFIX(public_id_ws_state), INVALID_PUBLIC_ID) + +FAXPP_Error +PREFIX(public_id_ws_state)(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = PREFIX(pubid_literal_start_state); + next_char(env); + break; +#ifdef ALLOW_PARAMETER_ENTITIES + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; +#endif + default: + env->state = PREFIX(pubid_literal_start_state); + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +PREFIX(pubid_literal_start_state)(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + return NO_ERROR; +#ifdef ALLOW_PARAMETER_ENTITIES + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; +#endif + case '"': + env->state = PREFIX(pubid_literal_quot_state); + break; + case '\'': + env->state = PREFIX(pubid_literal_apos_state); + break; + default: + next_char(env); + return EXPECTING_PUBID_LITERAL; + } + next_char(env); + token_start_position(env); + return NO_ERROR; +} + +FAXPP_Error +PREFIX(pubid_literal_apos_state)(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + case '\'': + env->state = PREFIX(public_id_ws_state2); + token_end_position(env); + report_token(PUBID_LITERAL_TOKEN, env); + next_char(env); + return NO_ERROR; + // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] + LINE_ENDINGS + // A-Z + case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: + case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: + case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: + case 0x58: case 0x59: case 0x5A: + // a-z + case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: + case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: + case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: + case 0x78: case 0x79: case 0x7A: + // 0-9 + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': + case '9': + case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':': + case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_': + case '%': + // Valid PubidChar + break; + default: + next_char(env); + return INVALID_CHAR_IN_PUBID_LITERAL; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +PREFIX(pubid_literal_quot_state)(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + case '"': + env->state = PREFIX(public_id_ws_state2); + token_end_position(env); + report_token(PUBID_LITERAL_TOKEN, env); + next_char(env); + return NO_ERROR; + // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] + LINE_ENDINGS + // A-Z + case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: + case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: + case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: + case 0x58: case 0x59: case 0x5A: + // a-z + case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: + case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: + case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: + case 0x78: case 0x79: case 0x7A: + // 0-9 + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': + case '9': + case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':': + case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_': + case '%': case '\'': + // Valid PubidChar + break; + default: + next_char(env); + return INVALID_CHAR_IN_PUBID_LITERAL; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +PREFIX(public_id_ws_state2)(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = PREFIX(public_id_ws_state3); + next_char(env); + break; +#ifdef ALLOW_PARAMETER_ENTITIES + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; +#endif +#ifdef SKIP_SYSTEM_LITERAL + case '>': + // Notation decls can skip the system literal + env->state = END_STATE; + return NO_ERROR; +#endif + default: + env->state = PREFIX(system_literal_start_state); + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +PREFIX(public_id_ws_state3)(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; +#ifdef ALLOW_PARAMETER_ENTITIES + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; +#endif +#ifdef SKIP_SYSTEM_LITERAL + case '>': + // Notation decls can skip the system literal + env->state = END_STATE; + return NO_ERROR; +#endif + default: + env->state = PREFIX(system_literal_start_state); + break; + } + return NO_ERROR; +} + +#undef SP_SINGLE_CHAR_STATE Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/tokenizer_states.c 2008-03-20 01:56:21 UTC (rev 47) @@ -450,47 +450,47 @@ else if(state == external_subset_decl_state) return "external_subset_decl_state"; - else if(state == system_id_initial_state1) - return "system_id_initial_state1"; - else if(state == system_id_initial_state2) - return "system_id_initial_state2"; - else if(state == system_id_initial_state3) - return "system_id_initial_state3"; - else if(state == system_id_initial_state4) - return "system_id_initial_state4"; - else if(state == system_id_initial_state5) - return "system_id_initial_state5"; - else if(state == system_id_ws_state) - return "system_id_ws_state"; - else if(state == system_literal_start_state) - return "system_literal_start_state"; - else if(state == system_literal_apos_state) - return "system_literal_apos_state"; - else if(state == system_literal_quot_state) - return "system_literal_quot_state"; + else if(state == doctype_system_id_initial_state1) + return "doctype_system_id_initial_state1"; + else if(state == doctype_system_id_initial_state2) + return "doctype_system_id_initial_state2"; + else if(state == doctype_system_id_initial_state3) + return "doctype_system_id_initial_state3"; + else if(state == doctype_system_id_initial_state4) + return "doctype_system_id_initial_state4"; + else if(state == doctype_system_id_initial_state5) + return "doctype_system_id_initial_state5"; + else if(state == doctype_system_id_ws_state) + return "doctype_system_id_ws_state"; + else if(state == doctype_system_literal_start_state) + return "doctype_system_literal_start_state"; + else if(state == doctype_system_literal_apos_state) + return "doctype_system_literal_apos_state"; + else if(state == doctype_system_literal_quot_state) + return "doctype_system_literal_quot_state"; - else if(state == public_id_initial_state1) - return "public_id_initial_state1"; - else if(state == public_id_initial_state2) - return "public_id_initial_state2"; - else if(state == public_id_initial_state3) - return "public_id_initial_state3"; - else if(state == public_id_initial_state4) - return "public_id_initial_state4"; - else if(state == public_id_initial_state5) - return "public_id_initial_state5"; - else if(state == public_id_ws_state) - return "public_id_ws_state"; - else if(state == pubid_literal_start_state) - return "pubid_literal_start_state"; - else if(state == pubid_literal_apos_state) - return "pubid_literal_apos_state"; - else if(state == pubid_literal_quot_state) - return "pubid_literal_quot_state"; - else if(state == public_id_ws_state2) - return "public_id_ws_state2"; - else if(state == public_id_ws_state3) - return "public_id_ws_state3"; + else if(state == doctype_public_id_initial_state1) + return "doctype_public_id_initial_state1"; + else if(state == doctype_public_id_initial_state2) + return "doctype_public_id_initial_state2"; + else if(state == doctype_public_id_initial_state3) + return "doctype_public_id_initial_state3"; + else if(state == doctype_public_id_initial_state4) + return "doctype_public_id_initial_state4"; + else if(state == doctype_public_id_initial_state5) + return "doctype_public_id_initial_state5"; + else if(state == doctype_public_id_ws_state) + return "doctype_public_id_ws_state"; + else if(state == doctype_pubid_literal_start_state) + return "doctype_pubid_literal_start_state"; + else if(state == doctype_pubid_literal_apos_state) + return "doctype_pubid_literal_apos_state"; + else if(state == doctype_pubid_literal_quot_state) + return "doctype_pubid_literal_quot_state"; + else if(state == doctype_public_id_ws_state2) + return "doctype_public_id_ws_state2"; + else if(state == doctype_public_id_ws_state3) + return "doctype_public_id_ws_state3"; else if(state == elementdecl_or_entitydecl_state) return "elementdecl_or_entitydecl_state"; @@ -797,6 +797,47 @@ else if(state == notationdecl_end_state) return "notationdecl_end_state"; + else if(state == notationdecl_system_id_initial_state1) + return "notationdecl_system_id_initial_state1"; + else if(state == notationdecl_system_id_initial_state2) + return "notationdecl_system_id_initial_state2"; + else if(state == notationdecl_system_id_initial_state3) + return "notationdecl_system_id_initial_state3"; + else if(state == notationdecl_system_id_initial_state4) + return "notationdecl_system_id_initial_state4"; + else if(state == notationdecl_system_id_initial_state5) + return "notationdecl_system_id_initial_state5"; + else if(state == notationdecl_system_id_ws_state) + return "notationdecl_system_id_ws_state"; + else if(state == notationdecl_system_literal_start_state) + return "notationdecl_system_literal_start_state"; + else if(state == notationdecl_system_literal_apos_state) + return "notationdecl_system_literal_apos_state"; + else if(state == notationdecl_system_literal_quot_state) + return "notationdecl_system_literal_quot_state"; + else if(state == notationdecl_public_id_initial_state1) + return "notationdecl_public_id_initial_state1"; + else if(state == notationdecl_public_id_initial_state2) + return "notationdecl_public_id_initial_state2"; + else if(state == notationdecl_public_id_initial_state3) + return "notationdecl_public_id_initial_state3"; + else if(state == notationdecl_public_id_initial_state4) + return "notationdecl_public_id_initial_state4"; + else if(state == notationdecl_public_id_initial_state5) + return "notationdecl_public_id_initial_state5"; + else if(state == notationdecl_public_id_ws_state) + return "notationdecl_public_id_ws_state"; + else if(state == notationdecl_pubid_literal_start_state) + return "notationdecl_pubid_literal_start_state"; + else if(state == notationdecl_pubid_literal_apos_state) + return "notationdecl_pubid_literal_apos_state"; + else if(state == notationdecl_pubid_literal_quot_state) + return "notationdecl_pubid_literal_quot_state"; + else if(state == notationdecl_public_id_ws_state2) + return "notationdecl_public_id_ws_state2"; + else if(state == notationdecl_public_id_ws_state3) + return "notationdecl_public_id_ws_state3"; + else if(state == entitydecl_initial_state1) return "entitydecl_initial_state1"; else if(state == entitydecl_initial_state1) @@ -837,6 +878,47 @@ else if(state == entitydecl_end_state) return "entitydecl_end_state"; + else if(state == entitydecl_system_id_initial_state1) + return "entitydecl_system_id_initial_state1"; + else if(state == entitydecl_system_id_initial_state2) + return "entitydecl_system_id_initial_state2"; + else if(state == entitydecl_system_id_initial_state3) + return "entitydecl_system_id_initial_state3"; + else if(state == entitydecl_system_id_initial_state4) + return "entitydecl_system_id_initial_state4"; + else if(state == entitydecl_system_id_initial_state5) + return "entitydecl_system_id_initial_state5"; + else if(state == entitydecl_system_id_ws_state) + return "entitydecl_system_id_ws_state"; + else if(state == entitydecl_system_literal_start_state) + return "entitydecl_system_literal_start_state"; + else if(state == entitydecl_system_literal_apos_state) + return "entitydecl_system_literal_apos_state"; + else if(state == entitydecl_system_literal_quot_state) + return "entitydecl_system_literal_quot_state"; + else if(state == entitydecl_public_id_initial_state1) + return "entitydecl_public_id_initial_state1"; + else if(state == entitydecl_public_id_initial_state2) + return "entitydecl_public_id_initial_state2"; + else if(state == entitydecl_public_id_initial_state3) + return "entitydecl_public_id_initial_state3"; + else if(state == entitydecl_public_id_initial_state4) + return "entitydecl_public_id_initial_state4"; + else if(state == entitydecl_public_id_initial_state5) + return "entitydecl_public_id_initial_state5"; + else if(state == entitydecl_public_id_ws_state) + return "entitydecl_public_id_ws_state"; + else if(state == entitydecl_pubid_literal_start_state) + return "entitydecl_pubid_literal_start_state"; + else if(state == entitydecl_pubid_literal_apos_state) + return "entitydecl_pubid_literal_apos_state"; + else if(state == entitydecl_pubid_literal_quot_state) + return "entitydecl_pubid_literal_quot_state"; + else if(state == entitydecl_public_id_ws_state2) + return "entitydecl_public_id_ws_state2"; + else if(state == entitydecl_public_id_ws_state3) + return "entitydecl_public_id_ws_state3"; + else if(state == paramentitydecl_name_state1) return "paramentitydecl_name_state1"; else if(state == paramentitydecl_name_state2) @@ -850,6 +932,47 @@ else if(state == paramentitydecl_end_state) return "paramentitydecl_end_state"; + else if(state == paramentitydecl_system_id_initial_state1) + return "paramentitydecl_system_id_initial_state1"; + else if(state == paramentitydecl_system_id_initial_state2) + return "paramentitydecl_system_id_initial_state2"; + else if(state == paramentitydecl_system_id_initial_state3) + return "paramentitydecl_system_id_initial_state3"; + else if(state == paramentitydecl_system_id_initial_state4) + return "paramentitydecl_system_id_initial_state4"; + else if(state == paramentitydecl_system_id_initial_state5) + return "paramentitydecl_system_id_initial_state5"; + else if(state == paramentitydecl_system_id_ws_state) + return "paramentitydecl_system_id_ws_state"; + else if(state == paramentitydecl_system_literal_start_state) + return "paramentitydecl_system_literal_start_state"; + else if(state == paramentitydecl_system_literal_apos_state) + return "paramentitydecl_system_literal_apos_state"; + else if(state == paramentitydecl_system_literal_quot_state) + return "paramentitydecl_system_literal_quot_state"; + else if(state == paramentitydecl_public_id_initial_state1) + return "paramentitydecl_public_id_initial_state1"; + else if(state == paramentitydecl_public_id_initial_state2) + return "paramentitydecl_public_id_initial_state2"; + else if(state == paramentitydecl_public_id_initial_state3) + return "paramentitydecl_public_id_initial_state3"; + else if(state == paramentitydecl_public_id_initial_state4) + return "paramentitydecl_public_id_initial_state4"; + else if(state == paramentitydecl_public_id_initial_state5) + return "paramentitydecl_public_id_initial_state5"; + else if(state == paramentitydecl_public_id_ws_state) + return "paramentitydecl_public_id_ws_state"; + else if(state == paramentitydecl_pubid_literal_start_state) + return "paramentitydecl_pubid_literal_start_state"; + else if(state == paramentitydecl_pubid_literal_apos_state) + return "paramentitydecl_pubid_literal_apos_state"; + else if(state == paramentitydecl_pubid_literal_quot_state) + return "paramentitydecl_pubid_literal_quot_state"; + else if(state == paramentitydecl_public_id_ws_state2) + return "paramentitydecl_public_id_ws_state2"; + else if(state == paramentitydecl_public_id_ws_state3) + return "paramentitydecl_public_id_ws_state3"; + else if(state == conditional_ws_state) return "conditional_ws_state"; else if(state == conditional_state1) Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/tokenizer_states.h 2008-03-20 01:56:21 UTC (rev 47) @@ -274,27 +274,27 @@ FAXPP_Error external_subset_markup_state(FAXPP_TokenizerEnv *env); FAXPP_Error external_subset_decl_state(FAXPP_TokenizerEnv *env); -FAXPP_Error system_id_initial_state1(FAXPP_TokenizerEnv *env); -FAXPP_Error system_id_initial_state2(FAXPP_TokenizerEnv *env); -FAXPP_Error system_id_initial_state3(FAXPP_TokenizerEnv *env); -FAXPP_Error system_id_initial_state4(FAXPP_TokenizerEnv *env); -FAXPP_Error system_id_initial_state5(FAXPP_TokenizerEnv *env); -FAXPP_Error system_id_ws_state(FAXPP_TokenizerEnv *env); -FAXPP_Error system_literal_start_state(FAXPP_TokenizerEnv *env); -FAXPP_Error system_literal_apos_state(FAXPP_TokenizerEnv *env); -FAXPP_Error system_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_literal_quot_state(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_initial_state1(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_initial_state2(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_initial_state3(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_initial_state4(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_initial_state5(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_ws_state(FAXPP_TokenizerEnv *env); -FAXPP_Error pubid_literal_start_state(FAXPP_TokenizerEnv *env); -FAXPP_Error pubid_literal_apos_state(FAXPP_TokenizerEnv *env); -FAXPP_Error pubid_literal_quot_state(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_ws_state2(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_ws_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_pubid_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_pubid_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_pubid_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_ws_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_ws_state3(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_or_entitydecl_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state1(FAXPP_TokenizerEnv *env); @@ -449,6 +449,28 @@ FAXPP_Error notationdecl_content_state(FAXPP_TokenizerEnv *env); FAXPP_Error notationdecl_end_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_literal_quot_state(FAXPP_TokenizerEnv *env); + +FAXPP_Error notationdecl_public_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_pubid_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_pubid_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_pubid_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_ws_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_ws_state3(FAXPP_TokenizerEnv *env); + FAXPP_Error entitydecl_initial_state1(FAXPP_TokenizerEnv *env); FAXPP_Error entitydecl_initial_state2(FAXPP_TokenizerEnv *env); FAXPP_Error entitydecl_initial_state3(FAXPP_TokenizerEnv *env); @@ -469,6 +491,28 @@ FAXPP_Error entitydecl_ndata_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error entitydecl_end_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_literal_quot_state(FAXPP_TokenizerEnv *env); + +FAXPP_Error entitydecl_public_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_pubid_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_pubid_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_pubid_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_ws_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_ws_state3(FAXPP_TokenizerEnv *env); + FAXPP_Error paramentitydecl_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error paramentitydecl_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error paramentitydecl_content_state(FAXPP_TokenizerEnv *env); @@ -476,6 +520,28 @@ FAXPP_Error paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env); FAXPP_Error paramentitydecl_end_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_literal_quot_state(FAXPP_TokenizerEnv *env); + +FAXPP_Error paramentitydecl_public_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_pubid_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_pubid_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_pubid_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_ws_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_ws_state3(FAXPP_TokenizerEnv *env); + FAXPP_Error conditional_ws_state(FAXPP_TokenizerEnv *env); FAXPP_Error conditional_state1(FAXPP_TokenizerEnv *env); FAXPP_Error conditional_state2(FAXPP_TokenizerEnv *env); Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/xml_parser.c 2008-03-20 01:56:21 UTC (rev 47) @@ -405,7 +405,7 @@ { uri->ptr = 0; uri->len = 0; - return p_find_ns_info(parser, prefix, uri); + return p_find_ns_info_impl(parser, prefix, uri); } unsigned int FAXPP_get_nesting_level(const FAXPP_Parser *parser) @@ -677,17 +677,6 @@ } \ } -#define p_copy_text_from_attr_value(text, attrval, env, buffer) \ -{ \ - if((attrval)->next == 0) { \ - p_copy_text_from_event((text), &(attrval)->value, (env), (buffer)); \ - } \ - else { \ - FAXPP_Error err = p_normalize_attr_value((text), (buffer), (attrval), (env)); \ - if(err != NO_ERROR) return err; \ - } \ -} - FAXPP_Error p_normalize_attr_value(FAXPP_Text *text, FAXPP_Buffer *buffer, const FAXPP_AttrValue *value, const FAXPP_ParserEnv *env) { FAXPP_Error err; @@ -1032,6 +1021,30 @@ return text_ptr == text_end; } +static int p_equals(const char *str, FAXPP_EncodeFunction encode, const FAXPP_Text *text) +{ + // No encoding represents a character with as many as 10 bytes + uint8_t encode_buffer[10]; + unsigned int encode_len; + + void *text_ptr = text->ptr; + void *text_end = text_ptr + text->len; + + while(*str != 0) { + if(text_ptr >= text_end) return 0; + + encode_len = encode(encode_buffer, encode_buffer + sizeof(encode_buffer), *str); + if((text_end - text_ptr) < encode_len || memcmp(encode_buffer, text_ptr, encode_len) != 0) { + return 0; + } + + text_ptr += encode_len; + ++str; + } + + return text_ptr == text_end; +} + static FAXPP_Error nc_start_document_next_event(FAXPP_ParserEnv *env) { FAXPP_NextEvent next; @@ -1048,7 +1061,7 @@ p_copy_text_from_token(&env->event.version, env, /*useTokenBuffer*/0); p_set_event_location_from_token(env); - if(p_case_insensitive_equals("1.1", env->tenv->transcoder.encode, &env->event.version)) { + if(p_equals("1.1", env->tenv->transcoder.encode, &env->event.version)) { if(env->xml_version == XML_VERSION_NOT_KNOWN) { env->xml_version = XML_VERSION_1_1; } @@ -1069,7 +1082,7 @@ case XML_DECL_STANDALONE_TOKEN: p_copy_text_from_token(&env->event.standalone, env, /*useTokenBuffer*/0); - if(p_case_insensitive_equals("YES", env->tenv->transcoder.encode, &env->event.standalone)) { + if(p_equals("yes", env->tenv->transcoder.encode, &env->event.standalone)) { env->standalone = 1; } break; @@ -1605,6 +1618,7 @@ err = p_parse_entity(env, ent, ATTRIBUTE_VALUE_ENTITY); if(err) goto error; + return NO_ERROR; } break; case PE_REFERENCE_TOKEN: @@ -1639,6 +1653,7 @@ p_set_text_from_text(&env->event.public_id, &bkup_public); if(err) goto error; + return NO_ERROR; } break; case PE_REFERENCE_IN_MARKUP_TOKEN: @@ -1660,7 +1675,7 @@ p_set_text_from_text(&env->event.public_id, &bkup_public); if(err) goto error; - break; + return NO_ERROR; case ELEMENTDECL_LPAR_TOKEN: cs = (FAXPP_ContentSpec*)malloc(sizeof(FAXPP_ContentSpec)); @@ -2045,6 +2060,7 @@ err = p_parse_entity(env, ent, ATTRIBUTE_VALUE_ENTITY); if(err) goto error; + return NO_ERROR; } else { p_reset_event(env); @@ -2119,10 +2135,36 @@ return err; } +static const char *xml_prefix = "xml"; +static const char *xmlns_prefix = "xmlns"; +static const char *xml_uri = "http://www.w3.org/XML/1998/namespace"; +static const char *xmlns_uri = "http://www.w3.org/2000/xmlns/"; + static FAXPP_Error p_add_ns_info(FAXPP_ParserEnv *env, const FAXPP_Attribute *attr) { - FAXPP_NamespaceInfo *nsinfo = env->namespace_pool; + FAXPP_NamespaceInfo *nsinfo; + // Check for invalid "xml" or "xmlns" namespace declarations + if(attr->prefix.len != 0) { + if(p_equals(xmlns_prefix, env->tenv->transcoder.encode, &attr->name)) + return INVALID_NAMESPACE_DECLARATION; + if(p_equals(xml_prefix, env->tenv->transcoder.encode, &attr->name) && + !p_equals(xml_uri, env->tenv->transcoder.encode, &attr->value.value)) + return INVALID_NAMESPACE_DECLARATION; + } + + if((attr->prefix.len == 0 || !p_equals(xml_prefix, env->tenv->transcoder.encode, &attr->name)) && + p_equals(xml_uri, env->tenv->transcoder.encode, &attr->value.value)) + return INVALID_NAMESPACE_DECLARATION; + + if(p_equals(xmlns_uri, env->tenv->transcoder.encode, &attr->value.value)) + return INVALID_NAMESPACE_DECLARATION; + + if(env->tenv->xml_char == CHAR10 && attr->prefix.len != 0 && attr->value.value.len == 0) + return INVALID_NAMESPACE_DECLARATION; + + // Add the namespace binding + nsinfo = env->namespace_pool; if(nsinfo == 0) { nsinfo = (FAXPP_NamespaceInfo*)malloc(sizeof(FAXPP_NamespaceInfo)); if(!nsinfo) return OUT_OF_MEMORY; @@ -2136,7 +2178,7 @@ env->namespace_stack = nsinfo; env->element_info_stack->ns = nsinfo; - p_copy_text_from_attr_value(&nsinfo->uri, &attr->value, env, &env->element_info_stack->buffer); + p_copy_text_from_event(&nsinfo->uri, &attr->value.value, env, &env->element_info_stack->buffer); if(attr->prefix.len != 0) { p_copy_text_from_event(&nsinfo->prefix, &attr->name, env, &env->element_info_stack->buffer); @@ -2166,6 +2208,10 @@ } // The prefix "xml" is always bound to the namespace URI "http://www.w3.org/XML/1998/namespace" + if(p_equals(xml_prefix, env->tenv->transcoder.encode, prefix)) { + p_copy_text_from_str(uri, &((FAXPP_ParserEnv*)env)->event_buffer, (FAXPP_ParserEnv*)env, xml_uri); + return NO_ERROR; + } return NO_URI_FOR_PREFIX; } @@ -2245,9 +2291,6 @@ env->element_info_pool = einfo; } -static const char *xml_uri = "http://www.w3.org/XML/1998/namespace"; -static const char *xmlns_uri = "http://www.w3.org/2000/xmlns/"; - static FAXPP_Error wf_next_event(FAXPP_ParserEnv *env) { int i, j; @@ -2271,8 +2314,9 @@ attr = &env->event.attrs[i]; /* Normalize the attribute values if required */ - if(env->tenv->normalize_attrs && - (attr->value.type != CHARACTERS_EVENT || attr->value.next != 0)) { + if(attr->xmlns_attr || attr->xml_attr || + (env->tenv->normalize_attrs && + (attr->value.type != CHARACTERS_EVENT || attr->value.next != 0))) { err = p_normalize_attr_value(&tmpText, &env->event_buffer, &attr->value, env); if(err != 0) return err; @@ -2287,7 +2331,7 @@ /* Check for namespace attributes */ if(attr->xmlns_attr) { err = p_add_ns_info(env, attr); - if(err != 0) { + if(err) { set_err_info_from_attr(env, attr); return err; } @@ -2313,6 +2357,13 @@ } else if(attr->xml_attr) { p_copy_text_from_str(&attr->uri, &env->event_buffer, env, xml_uri); + + if(p_equals("space", env->tenv->transcoder.encode, &attr->name) && + !p_equals("preserve", env->tenv->transcoder.encode, &attr->value.value) && + !p_equals("default", env->tenv->transcoder.encode, &attr->value.value)) { + set_err_info_from_attr(env, attr); + return INVALID_XMLSPACE_VALUE; + } } else if(attr->prefix.len != 0) { err = p_find_ns_info(env, &attr->prefix, &attr->uri); Modified: trunk/faxpp/tests/xmlconf_runner.c =================================================================== --- trunk/faxpp/tests/xmlconf_runner.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/tests/xmlconf_runner.c 2008-03-20 01:56:21 UTC (rev 47) @@ -192,10 +192,15 @@ else if(text_equal(event->name, "TEST")) { // TBD Check output - jpcs -/* if(find_attribute(event, "OUTPUT")) { */ +/* attr = find_attribute(event, "OUTPUT"); */ +/* if(attr) { */ +/* calculateBase(base_buffer, &attr->value, file_buffer); */ + /* printf("^"); */ +/* printf("\n%s\n", file_buffer); */ /* fflush(stdout); */ /* ++test_skips; */ +/* exit(-1); */ /* break; */ /* } */ @@ -213,11 +218,12 @@ break; } + attr = find_attribute(event, "TYPE"); + // Skip "error" type tests at the moment - since they // probably need detailed inspection to see which ones // ought to pass or fail // TBD enable these tests - jpcs - attr = find_attribute(event, "TYPE"); if(text_equal(attr->value.value, "error")) { printf("^"); fflush(stdout); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |