[Faxpp-devel] SF.net SVN: faxpp: [41] trunk/faxpp
Status: Beta
Brought to you by:
jpcs
From: <jp...@us...> - 2008-03-11 17:30:01
|
Revision: 41 http://faxpp.svn.sourceforge.net/faxpp/?rev=41&view=rev Author: jpcs Date: 2008-03-11 10:28:12 -0700 (Tue, 11 Mar 2008) Log Message: ----------- Correctly tokenize notation declarations and attlist declarations. Moved a test out of the inner loop in *element_content_state. Modified Paths: -------------- trunk/faxpp/TODO trunk/faxpp/examples/entity_resolver.c trunk/faxpp/examples/output_event.h trunk/faxpp/examples/parser_example.c trunk/faxpp/include/faxpp/error.h trunk/faxpp/include/faxpp/token.h trunk/faxpp/src/attlistdecl.c trunk/faxpp/src/attr_states.h trunk/faxpp/src/doctype.c trunk/faxpp/src/element_states.h trunk/faxpp/src/error.c trunk/faxpp/src/notationdecl.c trunk/faxpp/src/token.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_parser.h Modified: trunk/faxpp/TODO =================================================================== --- trunk/faxpp/TODO 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/TODO 2008-03-11 17:28:12 UTC (rev 41) @@ -8,11 +8,10 @@ xml:space value checking Error for redefining "xml" namespace Error for defining "xmlns" namespace +Parse element decls correctly +Parse parameter entities in markup correctly Large tasks ----------- -Entity expansion framework -DTD internal subset parsing -DTD external subset parsing DTD validation Modified: trunk/faxpp/examples/entity_resolver.c =================================================================== --- trunk/faxpp/examples/entity_resolver.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/examples/entity_resolver.c 2008-03-11 17:28:12 UTC (rev 41) @@ -60,7 +60,7 @@ file = fopen(path, "r"); if(file == 0) { - printf("Open of '%s' failed: %s\n", path, strerror(errno)); +/* printf("Open of '%s' failed: %s\n", path, strerror(errno)); */ return CANT_LOCATE_EXTERNAL_ENTITY; } Modified: trunk/faxpp/examples/output_event.h =================================================================== --- trunk/faxpp/examples/output_event.h 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/examples/output_event.h 2008-03-11 17:28:12 UTC (rev 41) @@ -23,5 +23,6 @@ void output_event(const FAXPP_Event *event, FILE *stream); void output_attr_value(const FAXPP_AttrValue *atval, FILE *stream); +void output_text(const FAXPP_Text *text, FILE *stream); #endif Modified: trunk/faxpp/examples/parser_example.c =================================================================== --- trunk/faxpp/examples/parser_example.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/examples/parser_example.c 2008-03-11 17:28:12 UTC (rev 41) @@ -76,14 +76,15 @@ } while((err = FAXPP_next_event(parser)) == 0) { - output_event(FAXPP_get_current_event(parser), stdout); +/* output_event(FAXPP_get_current_event(parser), stdout); */ if(FAXPP_get_current_event(parser)->type == END_DOCUMENT_EVENT) break; } if(err != NO_ERROR) { - printf("%03d:%03d ERROR: %s\n", FAXPP_get_error_line(parser), + output_text(FAXPP_get_base_uri(parser), stdout); + printf(":%d:%d ERROR: %s\n", FAXPP_get_error_line(parser), FAXPP_get_error_column(parser), FAXPP_err_to_string(err)); } Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/include/faxpp/error.h 2008-03-11 17:28:12 UTC (rev 41) @@ -65,6 +65,8 @@ ILLEGAL_PARAMETER_ENTITY, CANT_LOCATE_EXTERNAL_ENTITY, DONT_PARSE_EXTERNAL_ENTITY, + INVALID_ATTRIBUTE_TYPE, + INVALID_DEFAULTDECL, OUT_OF_MEMORY, ELEMENT_NAME_MISMATCH, Modified: trunk/faxpp/include/faxpp/token.h =================================================================== --- trunk/faxpp/include/faxpp/token.h 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/include/faxpp/token.h 2008-03-11 17:28:12 UTC (rev 41) @@ -79,10 +79,14 @@ ATTLISTDECL_PREFIX_TOKEN, ATTLISTDECL_NAME_TOKEN, - ATTLISTDECL_CONTENT_TOKEN, + ATTLISTDECL_ATTDEF_PREFIX_TOKEN, + ATTLISTDECL_ATTDEF_NAME_TOKEN, + ATTLISTDECL_NOTATION_NAME_TOKEN, + ATTLISTDECL_ENUMERATION_NAME_TOKEN, + ATTLISTDECL_END_TOKEN, NOTATIONDECL_NAME_TOKEN, - NOTATIONDECL_CONTENT_TOKEN, + NOTATIONDECL_END_TOKEN, ENTITYDECL_NAME_TOKEN, ENTITYDECL_VALUE_TOKEN, Modified: trunk/faxpp/src/attlistdecl.c =================================================================== --- trunk/faxpp/src/attlistdecl.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/attlistdecl.c 2008-03-11 17:28:12 UTC (rev 41) @@ -70,18 +70,16 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = attlistdecl_content_state; - env->state = ws_state; + env->state = attlistdecl_attdef_name_state1; token_end_position(env); report_token(ATTLISTDECL_NAME_TOKEN, env); next_char(env); return NO_ERROR; case '>': - env->state = attlistdecl_content_state; + env->state = attlistdecl_attdef_name_state1; token_end_position(env); report_token(ATTLISTDECL_NAME_TOKEN, env); // no next char - token_start_position(env); return NO_ERROR; case ':': env->state = attlistdecl_name_seen_colon_state1; @@ -129,18 +127,16 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = attlistdecl_content_state; - env->state = ws_state; + env->state = attlistdecl_attdef_name_state1; token_end_position(env); report_token(ATTLISTDECL_NAME_TOKEN, env); next_char(env); return NO_ERROR; case '>': - env->state = attlistdecl_content_state; + env->state = attlistdecl_attdef_name_state1; token_end_position(env); report_token(ATTLISTDECL_NAME_TOKEN, env); // no next char - token_start_position(env); return NO_ERROR; default: break; @@ -156,21 +152,704 @@ } FAXPP_Error -attlistdecl_content_state(FAXPP_TokenizerEnv *env) +attlistdecl_attdef_name_state1(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { + WHITESPACE: + break; case '>': base_state(env); + report_empty_token(ATTLISTDECL_END_TOKEN, env); + break; + default: + env->state = attlistdecl_attdef_name_state2; + token_start_position(env); + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ATTLISTDECL_NAME; + return NO_ERROR; + } + + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_attdef_name_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = attlistdecl_atttype_state; + env->state = ws_state; + token_end_position(env); + report_token(ATTLISTDECL_ATTDEF_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + case ':': + env->state = attlistdecl_attdef_name_seen_colon_state1; + token_end_position(env); + report_token(ATTLISTDECL_ATTDEF_PREFIX_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_ATTLISTDECL_NAME; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_attdef_name_seen_colon_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = attlistdecl_attdef_name_seen_colon_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ATTLISTDECL_NAME; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_attdef_name_seen_colon_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = attlistdecl_atttype_state; + env->state = ws_state; + token_end_position(env); + report_token(ATTLISTDECL_ATTDEF_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_ATTLISTDECL_NAME; + } + + // Never happens + return NO_ERROR; +} + +/* [54] AttType ::= StringType | TokenizedType | EnumeratedType */ +/* [55] StringType ::= 'CDATA' */ +/* [56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default] */ +/* | 'IDREF' [VC: IDREF] */ +/* | 'IDREFS' [VC: IDREF] */ +/* | 'ENTITY' [VC: Entity Name] */ +/* | 'ENTITIES' [VC: Entity Name] */ +/* | 'NMTOKEN' [VC: Name Token] */ +/* | 'NMTOKENS' [VC: Name Token] */ + +/* [57] EnumeratedType ::= NotationType | Enumeration */ +/* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' [VC: Notation Attributes] */ +/* [VC: One Notation Per Element Type] */ +/* [VC: No Notation on Empty Element] */ +/* [VC: No Duplicate Tokens] */ +/* [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration] */ +/* [VC: No Duplicate Tokens] */ +FAXPP_Error +attlistdecl_atttype_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case 'C': + env->state = attlistdecl_atttype_cdata_state1; + break; + case 'I': + env->state = attlistdecl_atttype_id_state1; + break; + case 'E': + env->state = attlistdecl_atttype_entity_state1; + break; + case 'N': + env->state = attlistdecl_atttype_nmtoken_state1; + break; + case '(': + env->stored_state = attlistdecl_atttype_enumeration_name_state1; + env->state = ws_state; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(attlistdecl_atttype_entity_state1, 'N', 0, attlistdecl_atttype_entity_state2, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_entity_state2, 'T', 0, attlistdecl_atttype_entity_state3, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_entity_state3, 'I', 0, attlistdecl_atttype_entity_state4, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_entity_state4, 'T', 0, attlistdecl_atttype_entity_state5, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_entity_state5(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case 'Y': + // TBD Tokens for these - jpcs + env->stored_state = attlistdecl_default_state1; + env->state = ws_state; + break; + case 'I': + env->state = attlistdecl_atttype_entities_state1; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +// TBD Tokens for these - jpcs + +SINGLE_CHAR_STATE(attlistdecl_atttype_entities_state1, 'E', 0, attlistdecl_atttype_entities_state2, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_entities_state2, 'S', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_nmtoken_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case 'O': + env->state = attlistdecl_atttype_notation_state1; + break; + case 'M': + env->state = attlistdecl_atttype_nmtoken_state2; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state2, 'T', 0, attlistdecl_atttype_nmtoken_state3, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state3, 'O', 0, attlistdecl_atttype_nmtoken_state4, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state4, 'K', 0, attlistdecl_atttype_nmtoken_state5, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state5, 'E', 0, attlistdecl_atttype_nmtoken_state6, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state6, 'N', 0, attlistdecl_atttype_nmtoken_state7, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_nmtoken_state7(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + // TBD Tokens for these - jpcs + env->stored_state = attlistdecl_default_state1; + env->state = ws_state; + break; + case 'S': + // TBD Tokens for these - jpcs + env->stored_state = attlistdecl_default_state1; + env->state = ws_plus_state; + break; + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +// TBD Tokens for these - jpcs + +SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state1, 'T', 0, attlistdecl_atttype_notation_state2, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state2, 'A', 0, attlistdecl_atttype_notation_state3, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state3, 'T', 0, attlistdecl_atttype_notation_state4, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state4, 'I', 0, attlistdecl_atttype_notation_state5, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state5, 'O', 0, attlistdecl_atttype_notation_state6, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state6, 'N', attlistdecl_atttype_notation_state7, ws_plus_state, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_notation_state7(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '(': + env->stored_state = attlistdecl_atttype_notation_name_state1; + env->state = ws_state; + break; + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_atttype_notation_name_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = attlistdecl_atttype_notation_name_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_ATTRIBUTE_TYPE; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_atttype_notation_name_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = attlistdecl_atttype_notation_separator_state; + env->state = ws_state; token_end_position(env); - report_token(ATTLISTDECL_CONTENT_TOKEN, env); + report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env); break; + case '|': + env->stored_state = attlistdecl_atttype_notation_name_state1; + env->state = ws_state; + token_end_position(env); + report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env); + break; + case ')': + env->stored_state = attlistdecl_default_state1; + env->state = ws_plus_state; + token_end_position(env); + report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env); + break; + default: + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_ATTRIBUTE_TYPE; + return NO_ERROR; + } + + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_atttype_notation_separator_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '|': + env->stored_state = attlistdecl_atttype_notation_name_state1; + env->state = ws_state; + break; + case ')': + env->stored_state = attlistdecl_default_state1; + env->state = ws_plus_state; + break; + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(attlistdecl_atttype_id_state1, 'D', 0, attlistdecl_atttype_id_state2, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_id_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + // TBD Tokens for these - jpcs + env->stored_state = attlistdecl_default_state1; + env->state = ws_state; + break; + case 'R': + env->state = attlistdecl_atttype_idref_state1; + break; + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(attlistdecl_atttype_idref_state1, 'E', 0, attlistdecl_atttype_idref_state2, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_idref_state2, 'F', 0, attlistdecl_atttype_idref_state3, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_idref_state3(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + // TBD Tokens for these - jpcs + env->stored_state = attlistdecl_default_state1; + env->state = ws_state; + break; + case 'S': + // TBD Tokens for these - jpcs + env->stored_state = attlistdecl_default_state1; + env->state = ws_plus_state; + break; + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +// TBD Tokens for these - jpcs + +SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state1, 'D', 0, attlistdecl_atttype_cdata_state2, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state2, 'A', 0, attlistdecl_atttype_cdata_state3, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state3, 'T', 0, attlistdecl_atttype_cdata_state4, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state4, 'A', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_enumeration_name_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { LINE_ENDINGS default: + env->state = attlistdecl_atttype_enumeration_name_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_ATTRIBUTE_TYPE; break; } + + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_atttype_enumeration_name_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = attlistdecl_atttype_enumeration_separator_state; + env->state = ws_state; + token_end_position(env); + report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env); + break; + case '|': + env->stored_state = attlistdecl_atttype_enumeration_name_state1; + env->state = ws_state; + token_end_position(env); + report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env); + break; + case ')': + env->stored_state = attlistdecl_default_state1; + env->state = ws_plus_state; + token_end_position(env); + report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env); + break; + default: + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_ATTRIBUTE_TYPE; + return NO_ERROR; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_atttype_enumeration_separator_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '|': + env->stored_state = attlistdecl_atttype_enumeration_name_state1; + env->state = ws_state; + break; + case ')': + env->stored_state = attlistdecl_default_state1; + env->state = ws_plus_state; + break; + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +/* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' */ +/* | (('#FIXED' S)? AttValue) [VC: Required Attribute] */ +/* [VC: Attribute Default Value Syntactically Correct] */ +/* [WFC: No < in Attribute Values] */ +/* [VC: Fixed Attribute Default] */ +FAXPP_Error +attlistdecl_default_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '#': + env->state = attlistdecl_default_state2; + next_char(env); + return NO_ERROR; + case '\'': + env->state = attlistdecl_attvalue_apos_state; + break; + case '"': + env->state = attlistdecl_attvalue_quot_state; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_DEFAULTDECL; + } + + next_char(env); + token_start_position(env); + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_default_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case 'R': + env->state = attlistdecl_default_required_state1; + break; + case 'I': + env->state = attlistdecl_default_implied_state1; + break; + case 'F': + env->state = attlistdecl_default_fixed_state1; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_DEFAULTDECL; + } + + next_char(env); + return NO_ERROR; +} + +// TBD Tokens for these - jpcs + +SINGLE_CHAR_STATE(attlistdecl_default_implied_state1, 'M', 0, attlistdecl_default_implied_state2, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_implied_state2, 'P', 0, attlistdecl_default_implied_state3, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_implied_state3, 'L', 0, attlistdecl_default_implied_state4, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_implied_state4, 'I', 0, attlistdecl_default_implied_state5, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_implied_state5, 'E', 0, attlistdecl_default_implied_state6, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_implied_state6, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL) + +SINGLE_CHAR_STATE(attlistdecl_default_required_state1, 'E', 0, attlistdecl_default_required_state2, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_required_state2, 'Q', 0, attlistdecl_default_required_state3, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_required_state3, 'U', 0, attlistdecl_default_required_state4, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_required_state4, 'I', 0, attlistdecl_default_required_state5, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_required_state5, 'R', 0, attlistdecl_default_required_state6, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_required_state6, 'E', 0, attlistdecl_default_required_state7, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_required_state7, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL) + +SINGLE_CHAR_STATE(attlistdecl_default_fixed_state1, 'I', 0, attlistdecl_default_fixed_state2, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_fixed_state2, 'X', 0, attlistdecl_default_fixed_state3, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_fixed_state3, 'E', 0, attlistdecl_default_fixed_state4, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_fixed_state4, 'D', attlistdecl_attvalue_start_state, ws_plus_state, INVALID_DEFAULTDECL) + +FAXPP_Error +attlistdecl_attvalue_start_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '\'': + env->state = attlistdecl_attvalue_apos_state; + break; + case '"': + env->state = attlistdecl_attvalue_quot_state; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_DEFAULTDECL; + } + + next_char(env); + token_start_position(env); + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_attvalue_apos_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + if(env->position >= env->buffer_end) { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(ATTRIBUTE_VALUE_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } + + read_char_no_check(env); + + switch(env->current_char) { + case '\'': + env->state = attlistdecl_attdef_name_state1; + token_end_position(env); + report_token(ATTRIBUTE_VALUE_TOKEN, env); + next_char(env); + return NO_ERROR; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(ATTRIBUTE_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case '<': + next_char(env); + return INVALID_CHAR_IN_ATTRIBUTE; + LINE_ENDINGS + case '\t': + if(env->normalize_attrs) { + // Move the token to the buffer, to normalize it + FAXPP_Error err = FAXPP_tokenizer_release_buffer(env, 0); + if(err != NO_ERROR) return err; + env->current_char = ' '; + } + break; + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens return NO_ERROR; } +FAXPP_Error +attlistdecl_attvalue_quot_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + if(env->position >= env->buffer_end) { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(ATTRIBUTE_VALUE_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } + + read_char_no_check(env); + + switch(env->current_char) { + case '"': + env->state = attlistdecl_attdef_name_state1; + token_end_position(env); + report_token(ATTRIBUTE_VALUE_TOKEN, env); + next_char(env); + return NO_ERROR; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(ATTRIBUTE_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case '<': + next_char(env); + return INVALID_CHAR_IN_ATTRIBUTE; + LINE_ENDINGS + case '\t': { + if(env->normalize_attrs) { + // Move the token to the buffer, to normalize it + FAXPP_Error err = FAXPP_tokenizer_release_buffer(env, 0); + if(err != NO_ERROR) return err; + env->current_char = ' '; + } + break; + } + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + + Modified: trunk/faxpp/src/attr_states.h =================================================================== --- trunk/faxpp/src/attr_states.h 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/attr_states.h 2008-03-11 17:28:12 UTC (rev 41) @@ -248,7 +248,7 @@ PREFIX(attr_value_apos_state)(FAXPP_TokenizerEnv *env) { while(1) { - if(env->position >= env->buffer_end) { + END_CHECK_IF { if(env->token.value.ptr) { token_end_position(env); if(env->token.value.len != 0) { @@ -309,7 +309,7 @@ PREFIX(attr_value_quot_state)(FAXPP_TokenizerEnv *env) { while(1) { - if(env->position >= env->buffer_end) { + END_CHECK_IF { if(env->token.value.ptr) { token_end_position(env); if(env->token.value.len != 0) { @@ -373,7 +373,7 @@ PREFIX(attr_value_state_en)(FAXPP_TokenizerEnv *env) { while(1) { - if(env->position >= env->buffer_end) { + END_CHECK_IF { if(env->token.value.ptr) { token_end_position(env); if(env->token.value.len != 0) { Modified: trunk/faxpp/src/doctype.c =================================================================== --- trunk/faxpp/src/doctype.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/doctype.c 2008-03-11 17:28:12 UTC (rev 41) @@ -381,7 +381,7 @@ switch(env->current_char) { case '\'': - env->state = system_id_ws_state; + env->state = public_id_ws_state2; token_end_position(env); report_token(PUBID_LITERAL_TOKEN, env); next_char(env); @@ -425,7 +425,7 @@ switch(env->current_char) { case '"': - env->state = system_id_ws_state; + env->state = public_id_ws_state2; token_end_position(env); report_token(PUBID_LITERAL_TOKEN, env); next_char(env); @@ -462,6 +462,53 @@ } FAXPP_Error +public_id_ws_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = public_id_ws_state3; + next_char(env); + break; + case '>': + if(env->stored_state == notationdecl_end_state) { + // Notation decls can skip the system literal + retrieve_state(env); + return NO_ERROR; + } + // Fall through + default: + env->state = system_literal_start_state; + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +public_id_ws_state3(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '>': + if(env->stored_state == notationdecl_end_state) { + // Notation decls can skip the system literal + retrieve_state(env); + return NO_ERROR; + } + // Fall through + default: + env->state = system_literal_start_state; + break; + } + return NO_ERROR; +} + +FAXPP_Error doctype_internal_subset_start_state(FAXPP_TokenizerEnv *env) { read_char(env); Modified: trunk/faxpp/src/element_states.h =================================================================== --- trunk/faxpp/src/element_states.h 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/element_states.h 2008-03-11 17:28:12 UTC (rev 41) @@ -190,84 +190,158 @@ FAXPP_Error PREFIX(element_content_state)(FAXPP_TokenizerEnv *env) { - while(1) { - if(env->position >= env->buffer_end) { - if(env->token.value.ptr) { - token_end_position(env); - if(env->token.value.len != 0) { - report_token(CHARACTERS_TOKEN, env); - return NO_ERROR; + if((env)->token_buffer.cursor) { + + while(1) { + END_CHECK_IF { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(CHARACTERS_TOKEN, env); + return NO_ERROR; + } } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; } - token_start_position(env); - return PREMATURE_END_OF_BUFFER; + + READ_CHAR; + + switch(env->current_char) { + case '<': + env->state = PREFIX(element_content_markup_state); + token_end_position(env); + report_token(CHARACTERS_TOKEN, env); + goto next_char_no_error; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(CHARACTERS_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case ']': + env->state = PREFIX(element_content_rsquare_state1); + goto next_char_no_error; + LINE_ENDINGS_LABEL(0) + break; + + // 0x0A, 0x0D, 0x26, 0x3C, 0x5D - Done above + + case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07: + case 0x08: case 0x0B: case 0x0C: case 0x0E: case 0x0F: + case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: + case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F: + goto restricted_char_error; + + case 0x09: + case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x27: + case 0x28: case 0x29: case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: case 0x2F: + case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37: + case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3D: case 0x3E: case 0x3F: + case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: + case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: + case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: + case 0x58: case 0x59: case 0x5A: case 0x5B: case 0x5C: case 0x5E: case 0x5F: + case 0x60: case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: + case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: + case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: + case 0x78: case 0x79: case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E: + // The char is a valid one byte char + break; + + default: + DEFAULT_CASE; + + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) + goto restricted_char_error; + break; + } + + next_char_append(env); + next_char_position(env); } - READ_CHAR; + } else { - switch(env->current_char) { - case '<': - env->state = PREFIX(element_content_markup_state); - token_end_position(env); - report_token(CHARACTERS_TOKEN, env); - next_char_no_error: - next_char(env); - return NO_ERROR; - case '&': - store_state(env); - env->state = reference_state; - token_end_position(env); - report_token(CHARACTERS_TOKEN, env); - next_char(env); - token_start_position(env); - return NO_ERROR; - case ']': - env->state = PREFIX(element_content_rsquare_state1); - goto next_char_no_error; - LINE_ENDINGS - break; + while(1) { + END_CHECK_IF { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(CHARACTERS_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } - // 0x0A, 0x0D, 0x26, 0x3C, 0x5D - Done above + READ_CHAR; - case 0x7F: - if(env->non_restricted_char == NON_RESTRICTED_CHAR11) { - case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07: - case 0x08: case 0x0B: case 0x0C: case 0x0E: case 0x0F: - case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: - case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F: - restricted_char_error: + switch(env->current_char) { + case '<': + env->state = PREFIX(element_content_markup_state); + token_end_position(env); + report_token(CHARACTERS_TOKEN, env); + goto next_char_no_error; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(CHARACTERS_TOKEN, env); next_char(env); - return RESTRICTED_CHAR; - } - break; - case 0x09: - case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x27: - case 0x28: case 0x29: case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: case 0x2F: - case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37: - case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3D: case 0x3E: case 0x3F: - case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: - case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: - case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: - case 0x58: case 0x59: case 0x5A: case 0x5B: case 0x5C: case 0x5E: case 0x5F: - case 0x60: case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: - case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: - case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: - case 0x78: case 0x79: case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E: - // The char is a valid one byte char - break; + token_start_position(env); + return NO_ERROR; + case ']': + env->state = PREFIX(element_content_rsquare_state1); + goto next_char_no_error; + LINE_ENDINGS_LABEL(1) + break; - default: - DEFAULT_CASE; + // 0x0A, 0x0D, 0x26, 0x3C, 0x5D - Done above - if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) + case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07: + case 0x08: case 0x0B: case 0x0C: case 0x0E: case 0x0F: + case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: + case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F: goto restricted_char_error; - break; + + case 0x09: + case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x27: + case 0x28: case 0x29: case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: case 0x2F: + case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37: + case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3D: case 0x3E: case 0x3F: + case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: + case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: + case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: + case 0x58: case 0x59: case 0x5A: case 0x5B: case 0x5C: case 0x5E: case 0x5F: + case 0x60: case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: + case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: + case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: + case 0x78: case 0x79: case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E: + // The char is a valid one byte char + break; + + default: + DEFAULT_CASE; + + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) + goto restricted_char_error; + break; + } + + next_char_position(env); } - next_char(env); } - // Never happens +restricted_char_error: + next_char(env); + return RESTRICTED_CHAR; +next_char_no_error: + next_char(env); return NO_ERROR; } @@ -311,7 +385,7 @@ FAXPP_Error PREFIX(element_content_rsquare_state1)(FAXPP_TokenizerEnv *env) { - if(env->position >= env->buffer_end) { + END_CHECK_IF { if(env->token.value.ptr) { token_end_position(env); if(env->token.value.len != 0) { @@ -341,7 +415,7 @@ FAXPP_Error PREFIX(element_content_rsquare_state2)(FAXPP_TokenizerEnv *env) { - if(env->position >= env->buffer_end) { + END_CHECK_IF { if(env->token.value.ptr) { token_end_position(env); if(env->token.value.len != 0) { Modified: trunk/faxpp/src/error.c =================================================================== --- trunk/faxpp/src/error.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/error.c 2008-03-11 17:28:12 UTC (rev 41) @@ -125,6 +125,10 @@ return "CANT_LOCATE_EXTERNAL_ENTITY"; case DONT_PARSE_EXTERNAL_ENTITY: return "DONT_PARSE_EXTERNAL_ENTITY"; + case INVALID_ATTRIBUTE_TYPE: + return "INVALID_ATTRIBUTE_TYPE"; + case INVALID_DEFAULTDECL: + return "INVALID_DEFAULTDECL"; case NO_ERROR: break; } Modified: trunk/faxpp/src/notationdecl.c =================================================================== --- trunk/faxpp/src/notationdecl.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/notationdecl.c 2008-03-11 17:28:12 UTC (rev 41) @@ -96,16 +96,42 @@ read_char(env); switch(env->current_char) { - case '>': - base_state(env); - token_end_position(env); - report_token(NOTATIONDECL_CONTENT_TOKEN, env); + case 'S': + env->stored_state = notationdecl_end_state; + env->state = system_id_initial_state1; break; + case 'P': + env->stored_state = notationdecl_end_state; + env->state = public_id_initial_state1; + break; LINE_ENDINGS default: - break; + next_char(env); + return INVALID_DTD_DECL; } next_char(env); return NO_ERROR; } +FAXPP_Error +notationdecl_end_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '>': + base_state(env); + report_empty_token(NOTATIONDECL_END_TOKEN, env); + next_char(env); + token_start_position(env); + break; + default: + next_char(env); + return INVALID_DTD_DECL; + } + return NO_ERROR; +} + Modified: trunk/faxpp/src/token.c =================================================================== --- trunk/faxpp/src/token.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/token.c 2008-03-11 17:28:12 UTC (rev 41) @@ -110,13 +110,21 @@ return "ATTLISTDECL_PREFIX_TOKEN"; case ATTLISTDECL_NAME_TOKEN: return "ATTLISTDECL_NAME_TOKEN"; - case ATTLISTDECL_CONTENT_TOKEN: - return "ATTLISTDECL_CONTENT_TOKEN"; + case ATTLISTDECL_ATTDEF_PREFIX_TOKEN: + return "ATTLISTDECL_ATTDEF_PREFIX_TOKEN"; + case ATTLISTDECL_ATTDEF_NAME_TOKEN: + return "ATTLISTDECL_ATTDEF_NAME_TOKEN"; + case ATTLISTDECL_NOTATION_NAME_TOKEN: + return "ATTLISTDECL_NOTATION_NAME_TOKEN"; + case ATTLISTDECL_ENUMERATION_NAME_TOKEN: + return "ATTLISTDECL_ENUMERATION_NAME_TOKEN"; + case ATTLISTDECL_END_TOKEN: + return "ATTLISTDECL_END_TOKEN"; case NOTATIONDECL_NAME_TOKEN: return "NOTATIONDECL_NAME_TOKEN"; - case NOTATIONDECL_CONTENT_TOKEN: - return "NOTATIONDECL_CONTENT_TOKEN"; + case NOTATIONDECL_END_TOKEN: + return "NOTATIONDECL_END_TOKEN"; case ENTITYDECL_NAME_TOKEN: return "ENTITYDECL_NAME_TOKEN"; Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/tokenizer_states.c 2008-03-11 17:28:12 UTC (rev 41) @@ -477,6 +477,10 @@ return "pubid_literal_apos_state"; else if(state == pubid_literal_quot_state) return "pubid_literal_quot_state"; + else if(state == public_id_ws_state2) + return "public_id_ws_state2"; + else if(state == public_id_ws_state3) + return "public_id_ws_state3"; else if(state == elementdecl_or_entitydecl_state) return "elementdecl_or_entitydecl_state"; @@ -523,9 +527,134 @@ return "attlistdecl_name_seen_colon_state1"; else if(state == attlistdecl_name_seen_colon_state2) return "attlistdecl_name_seen_colon_state2"; - else if(state == attlistdecl_content_state) - return "attlistdecl_content_state"; + else if(state == attlistdecl_attdef_name_state1) + return "attlistdecl_attdef_name_state1"; + else if(state == attlistdecl_attdef_name_state2) + return "attlistdecl_attdef_name_state2"; + else if(state == attlistdecl_attdef_name_seen_colon_state1) + return "attlistdecl_attdef_name_seen_colon_state1"; + else if(state == attlistdecl_attdef_name_seen_colon_state2) + return "attlistdecl_attdef_name_seen_colon_state2"; + else if(state == attlistdecl_atttype_state) + return "attlistdecl_atttype_state"; + else if(state == attlistdecl_atttype_cdata_state1) + return "attlistdecl_atttype_cdata_state1"; + else if(state == attlistdecl_atttype_cdata_state2) + return "attlistdecl_atttype_cdata_state2"; + else if(state == attlistdecl_atttype_cdata_state3) + return "attlistdecl_atttype_cdata_state3"; + else if(state == attlistdecl_atttype_cdata_state4) + return "attlistdecl_atttype_cdata_state4"; + else if(state == attlistdecl_atttype_id_state1) + return "attlistdecl_atttype_id_state1"; + else if(state == attlistdecl_atttype_id_state2) + return "attlistdecl_atttype_id_state2"; + else if(state == attlistdecl_atttype_idref_state1) + return "attlistdecl_atttype_idref_state1"; + else if(state == attlistdecl_atttype_idref_state2) + return "attlistdecl_atttype_idref_state2"; + else if(state == attlistdecl_atttype_idref_state3) + return "attlistdecl_atttype_idref_state3"; + else if(state == attlistdecl_atttype_entity_state1) + return "attlistdecl_atttype_entity_state1"; + else if(state == attlistdecl_atttype_entity_state2) + return "attlistdecl_atttype_entity_state2"; + else if(state == attlistdecl_atttype_entity_state3) + return "attlistdecl_atttype_entity_state3"; + else if(state == attlistdecl_atttype_entity_state4) + return "attlistdecl_atttype_entity_state4"; + else if(state == attlistdecl_atttype_entity_state5) + return "attlistdecl_atttype_entity_state5"; + else if(state == attlistdecl_atttype_entities_state1) + return "attlistdecl_atttype_entities_state1"; + else if(state == attlistdecl_atttype_entities_state2) + return "attlistdecl_atttype_entities_state2"; + else if(state == attlistdecl_atttype_nmtoken_state1) + return "attlistdecl_atttype_nmtoken_state1"; + else if(state == attlistdecl_atttype_nmtoken_state2) + return "attlistdecl_atttype_nmtoken_state2"; + else if(state == attlistdecl_atttype_nmtoken_state3) + return "attlistdecl_atttype_nmtoken_state3"; + else if(state == attlistdecl_atttype_nmtoken_state4) + return "attlistdecl_atttype_nmtoken_state4"; + else if(state == attlistdecl_atttype_nmtoken_state5) + return "attlistdecl_atttype_nmtoken_state5"; + else if(state == attlistdecl_atttype_nmtoken_state6) + return "attlistdecl_atttype_nmtoken_state6"; + else if(state == attlistdecl_atttype_nmtoken_state7) + return "attlistdecl_atttype_nmtoken_state7"; + else if(state == attlistdecl_atttype_notation_state1) + return "attlistdecl_atttype_notation_state1"; + else if(state == attlistdecl_atttype_notation_state2) + return "attlistdecl_atttype_notation_state2"; + else if(state == attlistdecl_atttype_notation_state3) + return "attlistdecl_atttype_notation_state3"; + else if(state == attlistdecl_atttype_notation_state4) + return "attlistdecl_atttype_notation_state4"; + else if(state == attlistdecl_atttype_notation_state5) + return "attlistdecl_atttype_notation_state5"; + else if(state == attlistdecl_atttype_notation_state6) + return "attlistdecl_atttype_notation_state6"; + else if(state == attlistdecl_atttype_notation_state7) + return "attlistdecl_atttype_notation_state7"; + else if(state == attlistdecl_atttype_notation_name_state1) + return "attlistdecl_atttype_notation_name_state1"; + else if(state == attlistdecl_atttype_notation_name_state2) + return "attlistdecl_atttype_notation_name_state2"; + else if(state == attlistdecl_atttype_notation_separator_state) + return "attlistdecl_atttype_notation_separator_state"; + else if(state == attlistdecl_atttype_enumeration_name_state1) + return "attlistdecl_atttype_enumeration_name_state1"; + else if(state == attlistdecl_atttype_enumeration_name_state2) + return "attlistdecl_atttype_enumeration_name_state2"; + else if(state == attlistdecl_atttype_enumeration_separator_state) + return "attlistdecl_atttype_enumeration_separator_state"; + else if(state == attlistdecl_default_state1) + return "attlistdecl_default_state1"; + else if(state == attlistdecl_default_state2) + return "attlistdecl_default_state2"; + else if(state == attlistdecl_default_implied_state1) + return "attlistdecl_default_implied_state1"; + else if(state == attlistdecl_default_implied_state2) + return "attlistdecl_default_implied_state2"; + else if(state == attlistdecl_default_implied_state3) + return "attlistdecl_default_implied_state3"; + else if(state == attlistdecl_default_implied_state4) + return "attlistdecl_default_implied_state4"; + else if(state == attlistdecl_default_implied_state5) + return "attlistdecl_default_implied_state5"; + else if(state == attlistdecl_default_implied_state6) + return "attlistdecl_default_implied_state6"; + else if(state == attlistdecl_default_required_state1) + return "attlistdecl_default_required_state1"; + else if(state == attlistdecl_default_required_state2) + return "attlistdecl_default_required_state2"; + else if(state == attlistdecl_default_required_state3) + return "attlistdecl_default_required_state3"; + else if(state == attlistdecl_default_required_state4) + return "attlistdecl_default_required_state4"; + else if(state == attlistdecl_default_required_state5) + return "attlistdecl_default_required_state5"; + else if(state == attlistdecl_default_required_state6) + return "attlistdecl_default_required_state6"; + else if(state == attlistdecl_default_required_state7) + return "attlistdecl_default_required_state7"; + else if(state == attlistdecl_default_fixed_state1) + return "attlistdecl_default_fixed_state1"; + else if(state == attlistdecl_default_fixed_state2) + return "attlistdecl_default_fixed_state2"; + else if(state == attlistdecl_default_fixed_state3) + return "attlistdecl_default_fixed_state3"; + else if(state == attlistdecl_default_fixed_state4) + return "attlistdecl_default_fixed_state4"; + else if(state == attlistdecl_attvalue_start_state) + return "attlistdecl_attvalue_start_state"; + else if(state == attlistdecl_attvalue_apos_state) + return "attlistdecl_attvalue_apos_state"; + else if(state == attlistdecl_attvalue_quot_state) + return "attlistdecl_attvalue_quot_state"; + else if(state == notationdecl_initial_state1) return "notationdecl_initial_state1"; else if(state == notationdecl_initial_state1) @@ -548,6 +677,8 @@ return "notationdecl_name_state2"; else if(state == notationdecl_content_state) return "notationdecl_content_state"; + else if(state == notationdecl_end_state) + return "notationdecl_end_state"; else if(state == entitydecl_initial_state1) return "entitydecl_initial_state1"; @@ -878,9 +1009,8 @@ // Include the default states #define PREFIX(name) default_ ## name -#define END_CHECK \ - if((env)->position >= (env)->buffer_end) \ - return PREMATURE_END_OF_BUFFER +#define END_CHECK_IF if((env)->position >= (env)->buffer_end) +#define END_CHECK END_CHECK_IF { return PREMATURE_END_OF_BUFFER; } #define READ_CHAR read_char_no_check(env) #define DEFAULT_CASE (void)0 @@ -890,14 +1020,14 @@ #undef DEFAULT_CASE #undef READ_CHAR #undef END_CHECK +#undef END_CHECK_IF #undef PREFIX // Include the utf8 states #define PREFIX(name) utf8_ ## name -#define END_CHECK \ - if((env)->position >= (env)->buffer_end) \ - return PREMATURE_END_OF_BUFFER +#define END_CHECK_IF if((env)->position >= (env)->buffer_end) +#define END_CHECK END_CHECK_IF { return PREMATURE_END_OF_BUFFER; } #define READ_CHAR \ /* Assume it's a one byte character for now */ \ env->current_char = *(uint8_t*)env->position; \ @@ -923,14 +1053,14 @@ #undef DEFAULT_CASE #undef READ_CHAR #undef END_CHECK +#undef END_CHECK_IF #undef PREFIX // Include the utf16 states #define PREFIX(name) utf16_ ## name -#define END_CHECK \ - if((env)->position + 1 >= (env)->buffer_end) \ - return PREMATURE_END_OF_BUFFER +#define END_CHECK_IF if((env)->position + 1 >= (env)->buffer_end) +#define END_CHECK END_CHECK_IF { return PREMATURE_END_OF_BUFFER; } #define READ_CHAR \ /* Assume it's not a surrogate pair for now */ \ env->current_char = *(uint16_t*)env->position; \ @@ -956,5 +1086,6 @@ #undef DEFAULT_CASE #undef READ_CHAR #undef END_CHECK +#undef END_CHECK_IF #undef PREFIX Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/tokenizer_states.h 2008-03-11 17:28:12 UTC (rev 41) @@ -288,6 +288,8 @@ FAXPP_Error pubid_literal_start_state(FAXPP_TokenizerEnv *env); FAXPP_Error pubid_literal_apos_state(FAXPP_TokenizerEnv *env); FAXPP_Error pubid_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error public_id_ws_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error public_id_ws_state3(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_or_entitydecl_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state1(FAXPP_TokenizerEnv *env); @@ -311,8 +313,72 @@ FAXPP_Error attlistdecl_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_name_seen_colon_state2(FAXPP_TokenizerEnv *env); -FAXPP_Error attlistdecl_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attdef_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attdef_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attdef_name_seen_colon_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attdef_name_seen_colon_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_state(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_cdata_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_cdata_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_cdata_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_cdata_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_id_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_id_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_idref_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_idref_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_idref_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entity_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entity_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entity_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entity_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entity_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entities_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entities_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state6(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state7(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state6(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state7(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_separator_state(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_enumeration_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_enumeration_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_enumeration_separator_state(FAXPP_TokenizerEnv *env); + +FAXPP_Error attlistdecl_default_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_implied_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_implied_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_implied_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_implied_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_implied_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_implied_state6(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state6(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state7(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_fixed_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_fixed_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_fixed_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_fixed_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attvalue_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attvalue_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attvalue_quot_state(FAXPP_TokenizerEnv *env); + FAXPP_Error notationdecl_initial_state1(FAXPP_TokenizerEnv *env); FAXPP_Error notationdecl_initial_state2(FAXPP_TokenizerEnv *env); FAXPP_Error notationdecl_initial_state3(FAXPP_TokenizerEnv *env); @@ -323,6 +389,7 @@ FAXPP_Error notationdecl_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error notationdecl_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error notationdecl_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_end_state(FAXPP_TokenizerEnv *env); FAXPP_Error entitydecl_initial_state1(FAXPP_TokenizerEnv *env); FAXPP_Error entitydecl_initial_state2(FAXPP_TokenizerEnv *env); @@ -385,15 +452,25 @@ read_char_no_check((env)); \ } +#define next_char_append(env) \ +{ \ + FAXPP_Error err = FAXPP_buffer_append_ch(&(env)->token_buffer, (env)->transcoder.encode, (env)->current_char); \ + if(err != 0) return err; \ +} + +#define next_char_position(env) \ +{ \ + (env)->column += 1; \ + (env)->position += (env)->char_len; \ +} + #define next_char(env) \ { \ if((env)->token_buffer.cursor) { \ - FAXPP_Error err = FAXPP_buffer_append_ch(&(env)->token_buffer, (env)->transcoder.encode, (env)->current_char); \ - if(err != 0) return err; \ + next_char_append((env)); \ } \ \ - (env)->column += 1; \ - (env)->position += (env)->char_len; \ + next_char_position((env)); \ } #define token_start_position(env) \ @@ -475,24 +552,26 @@ (env)->stored_state = 0; \ } -#define LINE_ENDINGS \ +#define LINE_ENDINGS_LABEL(label) \ case '\r': { \ Char32 next_char; \ if((env)->position + (env)->char_len >= (env)->buffer_end || \ (env)->decode((env)->position + (env)->char_len, (env)->buffer_end, &next_char) \ == TRANSCODE_PREMATURE_END_OF_BUFFER) { \ if(!(env)->buffer_done) return PREMATURE_END_OF_BUFFER; \ - goto LINE_ENDINGS_INC; \ + goto LINE_ENDINGS_INC ## label; \ } \ - else if(next_char != '\n') goto LINE_ENDINGS_INC; \ - goto LINE_ENDINGS_END; \ + else if(next_char != '\n') goto LINE_ENDINGS_INC ## label; \ + goto LINE_ENDINGS_END ## label; \ } \ case '\n': \ -LINE_ENDINGS_INC: \ +LINE_ENDINGS_INC ## label: \ (env)->line += 1; \ (env)->column = (unsigned int)-1; \ -LINE_ENDINGS_END: +LINE_ENDINGS_END ## label: +#define LINE_ENDINGS LINE_ENDINGS_LABEL(0) + /* * [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/xml_parser.c 2008-03-11 17:28:12 UTC (rev 41) @@ -182,7 +182,10 @@ FAXPP_set_tokenizer_decode(parser->tenv, decode); parser->tenv->user_provided_decode = 1; if(parser->next_event == nc_unsupported_encoding_next_event) { - parser->next_event =... [truncated message content] |