[Faxpp-devel] SF.net SVN: faxpp: [44] trunk/faxpp
Status: Beta
Brought to you by:
jpcs
|
From: <jp...@us...> - 2008-03-15 10:59:43
|
Revision: 44
http://faxpp.svn.sourceforge.net/faxpp/?rev=44&view=rev
Author: jpcs
Date: 2008-03-15 03:59:42 -0700 (Sat, 15 Mar 2008)
Log Message:
-----------
Started work on parsing and expanding parameter entity references in
DTD markup.
Modified Paths:
--------------
trunk/faxpp/examples/parser_example.c
trunk/faxpp/include/faxpp/error.h
trunk/faxpp/include/faxpp/parser.h
trunk/faxpp/include/faxpp/token.h
trunk/faxpp/src/conditional.c
trunk/faxpp/src/doctype.c
trunk/faxpp/src/elementdecl.c
trunk/faxpp/src/error.c
trunk/faxpp/src/reference.c
trunk/faxpp/src/token.c
trunk/faxpp/src/tokenizer_states.c
trunk/faxpp/src/tokenizer_states.h
trunk/faxpp/src/xml_parser.c
trunk/faxpp/src/xml_tokenizer.c
trunk/faxpp/src/xml_tokenizer.h
trunk/faxpp/src/xmldecl.c
Modified: trunk/faxpp/examples/parser_example.c
===================================================================
--- trunk/faxpp/examples/parser_example.c 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/examples/parser_example.c 2008-03-15 10:59:42 UTC (rev 44)
@@ -76,7 +76,7 @@
}
while((err = FAXPP_next_event(parser)) == 0) {
-/* output_event(FAXPP_get_current_event(parser), stdout); */
+ output_event(FAXPP_get_current_event(parser), stdout);
if(FAXPP_get_current_event(parser)->type == END_DOCUMENT_EVENT)
break;
Modified: trunk/faxpp/include/faxpp/error.h
===================================================================
--- trunk/faxpp/include/faxpp/error.h 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/include/faxpp/error.h 2008-03-15 10:59:42 UTC (rev 44)
@@ -69,6 +69,7 @@
INVALID_DEFAULTDECL,
INVALID_ELEMENTDECL_CONTENT,
INVALID_CONDITIONAL_SECTION,
+ IMPROPER_NESTING_OF_ENTITY,
OUT_OF_MEMORY,
ELEMENT_NAME_MISMATCH,
Modified: trunk/faxpp/include/faxpp/parser.h
===================================================================
--- trunk/faxpp/include/faxpp/parser.h 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/include/faxpp/parser.h 2008-03-15 10:59:42 UTC (rev 44)
@@ -92,8 +92,9 @@
/// The type of external entity to parse
typedef enum {
- EXTERNAL_PARSED_ENTITY = 0, ///< An external parsed entity
- EXTERNAL_SUBSET_ENTITY = 1 ///< An external subset (DTD)
+ EXTERNAL_PARSED_ENTITY = 0, ///< An external parsed entity
+ EXTERNAL_SUBSET_ENTITY = 1, ///< An external subset (DTD)
+ EXTERNAL_IN_MARKUP_ENTITY = 2 ///< An external entity inside DTD markup
} FAXPP_EntityType;
/**
Modified: trunk/faxpp/include/faxpp/token.h
===================================================================
--- trunk/faxpp/include/faxpp/token.h 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/include/faxpp/token.h 2008-03-15 10:59:42 UTC (rev 44)
@@ -64,6 +64,7 @@
DEC_CHAR_REFERENCE_TOKEN,
HEX_CHAR_REFERENCE_TOKEN,
PE_REFERENCE_TOKEN,
+ PE_REFERENCE_IN_MARKUP_TOKEN,
DOCTYPE_PREFIX_TOKEN,
DOCTYPE_NAME_TOKEN,
Modified: trunk/faxpp/src/conditional.c
===================================================================
--- trunk/faxpp/src/conditional.c 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/src/conditional.c 2008-03-15 10:59:42 UTC (rev 44)
@@ -37,6 +37,29 @@
return NO_ERROR; \
}
+FAXPP_Error
+conditional_ws_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '%':
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ default:
+ env->state = conditional_state1;
+ // No next_char
+ break;
+ }
+ return NO_ERROR;
+}
+
SINGLE_CHAR_STATE(conditional_state1, 'I', 0, conditional_state2, INVALID_CONDITIONAL_SECTION)
FAXPP_Error
@@ -47,11 +70,9 @@
switch(env->current_char) {
case 'N':
env->state = include_state1;
- env->nesting_level += 1;
break;
case 'G':
env->state = ignore_state1;
- env->nesting_level += 1;
break;
LINE_ENDINGS
default:
@@ -66,9 +87,28 @@
SINGLE_CHAR_STATE(ignore_state2, 'O', 0, ignore_state3, INVALID_CONDITIONAL_SECTION)
SINGLE_CHAR_STATE(ignore_state3, 'R', 0, ignore_state4, INVALID_CONDITIONAL_SECTION)
SINGLE_CHAR_STATE(ignore_state4, 'E', ignore_state5, ws_state, INVALID_CONDITIONAL_SECTION)
-SINGLE_CHAR_STATE(ignore_state5, '[', 0, ignore_content_state, INVALID_CONDITIONAL_SECTION)
FAXPP_Error
+ignore_state5(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '[':
+ env->ignore_start_level = env->nesting_level;
+ env->nesting_level += 1;
+ env->state = ignore_content_state;
+ next_char(env);
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_CONDITIONAL_SECTION;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
ignore_content_state(FAXPP_TokenizerEnv *env)
{
read_char(env);
@@ -169,7 +209,8 @@
break;
case '>':
env->nesting_level -= 1;
- if(env->nesting_level == 0) {
+ if(env->nesting_level == env->ignore_start_level) {
+ env->nesting_level -= 1;
base_state(env);
}
else {
@@ -194,4 +235,23 @@
SINGLE_CHAR_STATE(include_state3, 'U', 0, include_state4, INVALID_CONDITIONAL_SECTION)
SINGLE_CHAR_STATE(include_state4, 'D', 0, include_state5, INVALID_CONDITIONAL_SECTION)
SINGLE_CHAR_STATE(include_state5, 'E', include_state6, ws_state, INVALID_CONDITIONAL_SECTION)
-SINGLE_CHAR_STATE(include_state6, '[', 0, external_subset_state, INVALID_CONDITIONAL_SECTION)
+
+FAXPP_Error
+include_state6(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '[':
+ env->nesting_level += 1;
+ env->state = external_subset_state;
+ next_char(env);
+ break;
+ LINE_ENDINGS
+ default:
+ next_char(env);
+ return INVALID_CONDITIONAL_SECTION;
+ }
+ return NO_ERROR;
+}
+
Modified: trunk/faxpp/src/doctype.c
===================================================================
--- trunk/faxpp/src/doctype.c 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/src/doctype.c 2008-03-15 10:59:42 UTC (rev 44)
@@ -726,7 +726,7 @@
next_char(env);
return INVALID_DTD_DECL;
case '>':
- env->nesting_level -= 1;
+ env->nesting_level -= 2;
base_state(env);
break;
default:
@@ -772,8 +772,8 @@
env->state = comment_start_state2;
break;
case '[':
- env->stored_state = conditional_state1;
- env->state = ws_state;
+ env->nesting_level += 1;
+ env->state = conditional_ws_state;
break;
case 'E':
env->state = elementdecl_or_entitydecl_state;
Modified: trunk/faxpp/src/elementdecl.c
===================================================================
--- trunk/faxpp/src/elementdecl.c 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/src/elementdecl.c 2008-03-15 10:59:42 UTC (rev 44)
@@ -24,6 +24,7 @@
switch(env->current_char) {
case 'L':
+ env->nesting_level += 1;
env->state = elementdecl_initial_state1;
next_char(env);
break;
@@ -66,9 +67,34 @@
SINGLE_CHAR_STATE(elementdecl_initial_state2, 'M', 0, elementdecl_initial_state3, INVALID_DTD_DECL)
SINGLE_CHAR_STATE(elementdecl_initial_state3, 'E', 0, elementdecl_initial_state4, INVALID_DTD_DECL)
SINGLE_CHAR_STATE(elementdecl_initial_state4, 'N', 0, elementdecl_initial_state5, INVALID_DTD_DECL)
-SINGLE_CHAR_STATE(elementdecl_initial_state5, 'T', elementdecl_name_state1, ws_plus_state, INVALID_DTD_DECL)
+SINGLE_CHAR_STATE(elementdecl_initial_state5, 'T', 0, elementdecl_name_ws_state, INVALID_DTD_DECL)
FAXPP_Error
+elementdecl_name_ws_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ break;
+ case '%':
+ // TBD only for external subset - jpcs
+ store_state(env);
+ env->state = parameter_entity_reference_in_markup_state;
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ default:
+ env->state = elementdecl_name_state1;
+ token_start_position(env);
+ // No next_char
+ break;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
elementdecl_name_state1(FAXPP_TokenizerEnv *env)
{
read_char(env);
@@ -624,6 +650,7 @@
switch(env->current_char) {
case '>':
+ env->nesting_level -= 1;
base_state(env);
report_empty_token(ELEMENTDECL_END_TOKEN, env);
break;
Modified: trunk/faxpp/src/error.c
===================================================================
--- trunk/faxpp/src/error.c 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/src/error.c 2008-03-15 10:59:42 UTC (rev 44)
@@ -133,6 +133,8 @@
return "INVALID_ELEMENTDECL_CONTENT";
case INVALID_CONDITIONAL_SECTION:
return "INVALID_CONDITIONAL_SECTION";
+ case IMPROPER_NESTING_OF_ENTITY:
+ return "IMPROPER_NESTING_OF_ENTITY";
case NO_ERROR:
break;
}
Modified: trunk/faxpp/src/reference.c
===================================================================
--- trunk/faxpp/src/reference.c 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/src/reference.c 2008-03-15 10:59:42 UTC (rev 44)
@@ -359,6 +359,33 @@
}
FAXPP_Error
+parameter_entity_reference_in_markup_state(FAXPP_TokenizerEnv *env)
+{
+ while(1) {
+ read_char(env);
+
+ switch(env->current_char) {
+ LINE_ENDINGS
+ break;
+ case ';':
+ retrieve_state(env);
+ token_end_position(env);
+ report_token(PE_REFERENCE_IN_MARKUP_TOKEN, env);
+ next_char(env);
+ token_start_position(env);
+ return NO_ERROR;
+ }
+
+ next_char(env);
+ if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0)
+ return INVALID_CHAR_IN_ENTITY_REFERENCE;
+ }
+
+ // Never happens
+ return NO_ERROR;
+}
+
+FAXPP_Error
char_reference_state(FAXPP_TokenizerEnv *env)
{
read_char(env);
Modified: trunk/faxpp/src/token.c
===================================================================
--- trunk/faxpp/src/token.c 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/src/token.c 2008-03-15 10:59:42 UTC (rev 44)
@@ -54,6 +54,8 @@
return "ENTITY_REFERENCE_TOKEN";
case PE_REFERENCE_TOKEN:
return "PE_REFERENCE_TOKEN";
+ case PE_REFERENCE_IN_MARKUP_TOKEN:
+ return "PE_REFERENCE_IN_MARKUP_TOKEN";
case DEC_CHAR_REFERENCE_TOKEN:
return "DEC_CHAR_REFERENCE_TOKEN";
case HEX_CHAR_REFERENCE_TOKEN:
Modified: trunk/faxpp/src/tokenizer_states.c
===================================================================
--- trunk/faxpp/src/tokenizer_states.c 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/src/tokenizer_states.c 2008-03-15 10:59:42 UTC (rev 44)
@@ -256,6 +256,8 @@
return "entity_reference_state";
else if(state == parameter_entity_reference_state)
return "parameter_entity_reference_state";
+ else if(state == parameter_entity_reference_in_markup_state)
+ return "parameter_entity_reference_in_markup_state";
else if(state == char_reference_state)
return "char_reference_state";
else if(state == dec_char_reference_state)
@@ -498,6 +500,8 @@
return "elementdecl_initial_state4";
else if(state == elementdecl_initial_state5)
return "elementdecl_initial_state5";
+ else if(state == elementdecl_name_ws_state)
+ return "elementdecl_name_ws_state";
else if(state == elementdecl_name_state1)
return "elementdecl_name_state1";
else if(state == elementdecl_name_state2)
@@ -793,6 +797,8 @@
else if(state == paramentitydecl_end_state)
return "paramentitydecl_end_state";
+ else if(state == conditional_ws_state)
+ return "conditional_ws_state";
else if(state == conditional_state1)
return "conditional_state1";
else if(state == conditional_state2)
Modified: trunk/faxpp/src/tokenizer_states.h
===================================================================
--- trunk/faxpp/src/tokenizer_states.h 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/src/tokenizer_states.h 2008-03-15 10:59:42 UTC (rev 44)
@@ -172,6 +172,7 @@
FAXPP_Error quot_entity_reference_state4(FAXPP_TokenizerEnv *env);
FAXPP_Error entity_reference_state(FAXPP_TokenizerEnv *env);
FAXPP_Error parameter_entity_reference_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error parameter_entity_reference_in_markup_state(FAXPP_TokenizerEnv *env);
FAXPP_Error char_reference_state(FAXPP_TokenizerEnv *env);
FAXPP_Error dec_char_reference_state(FAXPP_TokenizerEnv *env);
FAXPP_Error hex_char_reference_state1(FAXPP_TokenizerEnv *env);
@@ -299,6 +300,7 @@
FAXPP_Error elementdecl_initial_state3(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_initial_state4(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_initial_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error elementdecl_name_ws_state(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_name_state1(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_name_state2(FAXPP_TokenizerEnv *env);
FAXPP_Error elementdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env);
@@ -448,6 +450,7 @@
FAXPP_Error paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env);
FAXPP_Error paramentitydecl_end_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error conditional_ws_state(FAXPP_TokenizerEnv *env);
FAXPP_Error conditional_state1(FAXPP_TokenizerEnv *env);
FAXPP_Error conditional_state2(FAXPP_TokenizerEnv *env);
FAXPP_Error ignore_state1(FAXPP_TokenizerEnv *env);
@@ -489,8 +492,8 @@
return BAD_ENCODING; \
} \
\
-/* printf("%03d:%03d State: %s, Byte: %c, Char: %08X\n", (env)->line, (env)->column, */ \
-/* FAXPP_state_to_string((env)->state), *(unsigned char*)(env)->position, */ \
+/* printf("%03d:%03d Tok:%p L:%03d State: %s, Byte: %c, Char: %08X\n", (env)->line, (env)->column, */ \
+/* (env), (env)->nesting_level, FAXPP_state_to_string((env)->state), *(unsigned char*)(env)->position, */ \
/* (env)->current_char); */ \
}
@@ -560,6 +563,8 @@
(env)->state = final_state; \
else if((env)->internal_subset) \
(env)->state = internal_subset_state; \
+ else if((env)->in_markup_entity) \
+ (env)->state = (env)->prev->state; \
else (env)->state = initial_misc_state; \
}
Modified: trunk/faxpp/src/xml_parser.c
===================================================================
--- trunk/faxpp/src/xml_parser.c 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/src/xml_parser.c 2008-03-15 10:59:42 UTC (rev 44)
@@ -1069,7 +1069,7 @@
case XML_DECL_END_TOKEN:
env->next_event = nc_unsupported_encoding_next_event;
- if(env->tenv->external_subset) {
+ if(env->tenv->external_subset || env->tenv->in_markup_entity) {
// TBD event for start of external subset - jpcs
next = nc_dtd_next_event;
}
@@ -1131,7 +1131,7 @@
default:
env->tenv->buffered_token = 1;
p_reset_event(env);
- if(env->tenv->external_subset) {
+ if(env->tenv->external_subset || env->tenv->in_markup_entity) {
// TBD event for start of external subset - jpcs
env->next_event = nc_dtd_next_event;
}
@@ -1201,14 +1201,6 @@
#define p_compare_text(a, b) (((a)->len == (b)->len) ? memcmp((a)->ptr, (b)->ptr, (a)->len) : ((a)->len - (b)->len))
-/* static int p_compare_text(const FAXPP_Text *a, const FAXPP_Text *b) */
-/* { */
-/* int cmp = a->len - b->len; */
-/* if(cmp != 0) return cmp; */
-
-/* return memcmp(a->ptr, b->ptr, a->len); */
-/* } */
-
static FAXPP_EntityInfo *p_find_entity_info(const FAXPP_Text *name, FAXPP_EntityInfo *list)
{
while(list) {
@@ -1238,9 +1230,6 @@
env->tenv->line = entv->line;
env->tenv->column = entv->column;
- if(state == EXTERNAL_PARSED_ENTITY)
- env->next_event = nc_start_document_next_event;
-
// Set the entity on the first new tokenizer
if(*initial_entity) {
env->tenv->start_of_entity = 1;
@@ -1282,8 +1271,13 @@
return err;
}
+static const char single_space[] = {' '};
+
static FAXPP_Error p_parse_entity(FAXPP_ParserEnv *env, FAXPP_EntityInfo *ent, FAXPP_EntityParseState state)
{
+ FAXPP_Error err;
+ FAXPP_EntityInfo *tmp;
+
// Check for a recursive entity
FAXPP_TokenizerEnv *tokenizer = env->tenv;
while(tokenizer) {
@@ -1293,18 +1287,47 @@
tokenizer = tokenizer->prev;
}
+ if(state == IN_MARKUP_ENTITY || state == EXTERNAL_IN_MARKUP_ENTITY) {
+ // Add a space after the entity inside DTD markup
+ err = FAXPP_push_entity_tokenizer(&env->tenv, IN_MARKUP_ENTITY, (void*)single_space, 1, /*done*/1);
+ if(err) return err;
+
+ env->tenv->line = ent->line;
+ env->tenv->column = ent->column;
+
+ FAXPP_set_tokenizer_decode(env->tenv, FAXPP_utf8_decode);
+ }
+
if(ent->external) {
switch(state) {
case ELEMENT_CONTENT_ENTITY: state = EXTERNAL_PARSED_ENTITY; break;
case INTERNAL_DTD_ENTITY: state = EXTERNAL_SUBSET_ENTITY; break;
case EXTERNAL_DTD_ENTITY: state = EXTERNAL_SUBSET_ENTITY; break;
+ case IN_MARKUP_ENTITY: state = EXTERNAL_IN_MARKUP_ENTITY; break;
default: break;
}
- return p_parse_external_entity(env, ent, state);
+ err = p_parse_external_entity(env, ent, state);
+ if(err) return err;
}
+ else {
+ tmp = ent;
+ err = p_parse_internal_entity(env, ent, state, &tmp);
+ if(err) return err;
- return p_parse_internal_entity(env, ent, state, &ent);
+ if(state == IN_MARKUP_ENTITY || state == EXTERNAL_IN_MARKUP_ENTITY) {
+ // Add a space before the entity inside DTD markup
+ err = FAXPP_push_entity_tokenizer(&env->tenv, IN_MARKUP_ENTITY, (void*)single_space, 1, /*done*/1);
+ if(err) return err;
+
+ env->tenv->line = ent->line;
+ env->tenv->column = ent->column;
+
+ FAXPP_set_tokenizer_decode(env->tenv, FAXPP_utf8_decode);
+ }
+ }
+
+ return NO_ERROR;
}
static Char32 p_dec_char_ref_value(const FAXPP_Text *text, FAXPP_ParserEnv *env)
@@ -1563,7 +1586,27 @@
if(err) goto error;
}
break;
+ case PE_REFERENCE_IN_MARKUP_TOKEN:
+ // Parameter entity references cannot be forward references -
+ // so we go ahead and look them up straight away
+ ent = p_find_entity_info(&env->tenv->result_token.value, env->parameter_entities);
+ // [VC: Entity Declared]
+ if(ent == 0) {
+ err = UNDEFINED_ENTITY;
+ goto error;
+ }
+ p_set_text_from_text(&bkup_system, &env->event.system_id);
+ p_set_text_from_text(&bkup_public, &env->event.public_id);
+
+ err = p_parse_entity(env, ent, IN_MARKUP_ENTITY);
+
+ p_set_text_from_text(&env->event.system_id, &bkup_system);
+ p_set_text_from_text(&env->event.public_id, &bkup_public);
+
+ if(err) goto error;
+ break;
+
case ELEMENTDECL_LPAR_TOKEN:
cs = (FAXPP_ContentSpec*)malloc(sizeof(FAXPP_ContentSpec));
memset(cs, 0, sizeof(FAXPP_ContentSpec));
@@ -1699,6 +1742,7 @@
case PUBID_LITERAL_TOKEN:
case NDATA_NAME_TOKEN:
case PE_REFERENCE_TOKEN:
+ case PE_REFERENCE_IN_MARKUP_TOKEN:
case ELEMENTDECL_PREFIX_TOKEN:
case ELEMENTDECL_NAME_TOKEN:
case ELEMENTDECL_EMPTY_TOKEN:
Modified: trunk/faxpp/src/xml_tokenizer.c
===================================================================
--- trunk/faxpp/src/xml_tokenizer.c 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/src/xml_tokenizer.c 2008-03-15 10:59:42 UTC (rev 44)
@@ -392,6 +392,7 @@
env->nesting_level = 0;
env->elemdecl_content_level = 0;
+ env->ignore_start_level = 0;
env->do_encode = 1;
env->seen_doctype = 0;
@@ -403,6 +404,7 @@
env->internal_dtd_entity = 0;
env->external_dtd_entity = 0;
env->external_parsed_entity = 0;
+ env->in_markup_entity = 0;
env->start_of_entity = 0;
env->start_of_file = 0;
@@ -470,6 +472,7 @@
env->external_dtd_entity = state == EXTERNAL_DTD_ENTITY;
env->external_parsed_entity = state == EXTERNAL_PARSED_ENTITY;
env->external_subset = state == EXTERNAL_SUBSET_ENTITY;
+ env->in_markup_entity = state == IN_MARKUP_ENTITY || state == EXTERNAL_IN_MARKUP_ENTITY;
FAXPP_set_tokenizer_decode(env, env->prev->transcoder.decode);
@@ -491,8 +494,12 @@
case EXTERNAL_DTD_ENTITY:
env->state = external_subset_state;
break;
+ case IN_MARKUP_ENTITY:
+ env->state = env->prev->state;
+ break;
case EXTERNAL_PARSED_ENTITY:
case EXTERNAL_SUBSET_ENTITY:
+ case EXTERNAL_IN_MARKUP_ENTITY:
env->state = initial_state;
break;
}
@@ -515,7 +522,11 @@
*list = env->prev;
if(env->start_of_entity) {
- if(env->stored_state != 0 || env->nesting_level != 0 || env->elemdecl_content_level != 0 ||
+ if(env->in_markup_entity) {
+ if(env->nesting_level != 0 || env->elemdecl_content_level != 0)
+ err = IMPROPER_NESTING_OF_ENTITY;
+ }
+ else if(env->stored_state != 0 || env->nesting_level != 0 || env->elemdecl_content_level != 0 ||
(env->element_entity && env->state != parsed_entity_state &&
env->state != default_element_content_rsquare_state1 &&
env->state != default_element_content_rsquare_state2) ||
@@ -527,7 +538,8 @@
err = INCOMPLETE_MARKUP_IN_ENTITY_VALUE;
}
}
- else {
+
+ if(env->in_markup_entity || !env->start_of_entity) {
// Force the old tokenizer token to point into the token buffer
FAXPP_tokenizer_release_buffer(env, 0);
Modified: trunk/faxpp/src/xml_tokenizer.h
===================================================================
--- trunk/faxpp/src/xml_tokenizer.h 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/src/xml_tokenizer.h 2008-03-15 10:59:42 UTC (rev 44)
@@ -52,6 +52,7 @@
unsigned int nesting_level;
unsigned int elemdecl_content_level;
+ unsigned int ignore_start_level;
unsigned int do_encode:1;
unsigned int buffer_done:1;
@@ -64,6 +65,7 @@
unsigned int internal_dtd_entity:1;
unsigned int external_dtd_entity:1;
unsigned int external_parsed_entity:1;
+ unsigned int in_markup_entity:1;
unsigned int normalize_attrs:1;
unsigned int user_provided_decode:1;
@@ -99,14 +101,16 @@
struct FAXPP_TokenizerEnv_s *prev;
};
-// The first two values are the same as the values in FAXPP_EntityType
+// The first three values are the same as the values in FAXPP_EntityType
typedef enum {
EXTERNAL_PARSED_ENTITY2 = EXTERNAL_PARSED_ENTITY,
EXTERNAL_SUBSET_ENTITY2 = EXTERNAL_SUBSET_ENTITY,
+ EXTERNAL_IN_MARKUP_ENTITY2 = EXTERNAL_IN_MARKUP_ENTITY,
ELEMENT_CONTENT_ENTITY,
INTERNAL_DTD_ENTITY,
EXTERNAL_DTD_ENTITY,
+ IN_MARKUP_ENTITY,
ATTRIBUTE_VALUE_ENTITY
} FAXPP_EntityParseState;
Modified: trunk/faxpp/src/xmldecl.c
===================================================================
--- trunk/faxpp/src/xmldecl.c 2008-03-14 15:24:54 UTC (rev 43)
+++ trunk/faxpp/src/xmldecl.c 2008-03-15 10:59:42 UTC (rev 44)
@@ -16,6 +16,7 @@
#include "tokenizer_states.h"
#include "char_classes.h"
+#include "xml_parser.h"
FAXPP_Error
xml_decl_or_markup_state(FAXPP_TokenizerEnv *env)
@@ -29,7 +30,10 @@
token_start_position(env);
break;
case '!':
- if(env->external_subset)
+ // TBD Do this in all other places where it's not an XMLDecl - jpcs
+ if(env->in_markup_entity)
+ return INVALID_DTD_DECL; // TBD is this right? - jpcs
+ else if(env->external_subset)
env->state = external_subset_markup_state;
else
env->state = initial_markup_state;
@@ -164,7 +168,7 @@
env->state = xml_decl_version_state2;
break;
case 'e':
- if(env->external_parsed_entity || env->external_subset) {
+ if(env->external_parsed_entity || env->external_subset || env->in_markup_entity) {
env->state = xml_decl_encoding_state2;
break;
}
@@ -330,13 +334,13 @@
WHITESPACE:
break;
case '?':
- if(env->external_parsed_entity || env->external_subset) goto invalid_char;
+ if(env->external_parsed_entity || env->external_subset || env->in_markup_entity) goto invalid_char;
env->state = xml_decl_seen_question_state;
token_start_position(env);
break;
case 's':
- if(env->external_parsed_entity || env->external_subset) goto invalid_char;
+ if(env->external_parsed_entity || env->external_subset || env->in_markup_entity) goto invalid_char;
env->state = xml_decl_standalone_state2;
break;
@@ -524,7 +528,7 @@
next_char(env);
break;
case 's':
- if(!env->external_parsed_entity && !env->external_subset) {
+ if(!env->external_parsed_entity && !env->external_subset && !env->in_markup_entity) {
env->state = xml_decl_standalone_state2;
next_char(env);
break;
@@ -698,17 +702,42 @@
return NO_ERROR;
}
+static const char single_space[] = {' '};
+
FAXPP_Error
xml_decl_seen_question_state(FAXPP_TokenizerEnv *env)
{
+ FAXPP_Error err;
+ FAXPP_TokenizerEnv *tok;
+
read_char(env);
switch(env->current_char) {
case '>':
base_state(env);
+
report_empty_token(XML_DECL_END_TOKEN, env);
next_char(env);
token_start_position(env);
+
+ if(env->in_markup_entity) {
+ // Add a space before the entity inside DTD markup
+ err = FAXPP_push_entity_tokenizer(&env, IN_MARKUP_ENTITY, (void*)single_space, 1, /*done*/1);
+ if(err) return err;
+
+ tok = env;
+ while(tok && tok->entity == 0) {
+ tok = tok->prev;
+ }
+
+ if(tok) {
+ env->line = tok->entity->line;
+ env->column = tok->entity->column;
+ }
+
+ FAXPP_set_tokenizer_decode(env, FAXPP_utf8_decode);
+ }
+
break;
LINE_ENDINGS
default:
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|