faxpp-devel Mailing List for Fast XML Pull Parser
Status: Beta
Brought to you by:
jpcs
You can subscribe to this list here.
2007 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
(10) |
Aug
(22) |
Sep
(4) |
Oct
(1) |
Nov
|
Dec
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
2008 |
Jan
(1) |
Feb
(3) |
Mar
(14) |
Apr
|
May
(1) |
Jun
|
Jul
|
Aug
(1) |
Sep
|
Oct
|
Nov
|
Dec
(6) |
2011 |
Jan
|
Feb
|
Mar
|
Apr
(1) |
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <jp...@us...> - 2011-04-05 16:45:59
|
Revision: 59 http://faxpp.svn.sourceforge.net/faxpp/?rev=59&view=rev Author: jpcs Date: 2011-04-05 16:45:53 +0000 (Tue, 05 Apr 2011) Log Message: ----------- Fix DTD parsing so that entities pointing to external subsets in the internal subset don't stop the main external subset from being loaded. Modified Paths: -------------- trunk/faxpp/src/xml_parser.c Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-12-04 14:43:42 UTC (rev 58) +++ trunk/faxpp/src/xml_parser.c 2011-04-05 16:45:53 UTC (rev 59) @@ -627,6 +627,22 @@ /* static void p_print_token(FAXPP_ParserEnv *env) */ /* { */ /* char buf[BUF_SIZE + 1]; */ + +/* if(env->tenv->base_uri.ptr != 0) { */ +/* if(env->tenv->base_uri.len > BUF_SIZE) { */ +/* strncpy(buf + 3, env->tenv->base_uri.ptr + env->tenv->base_uri.len - BUF_SIZE + 3, BUF_SIZE - 3); */ +/* buf[0] = '.'; */ +/* buf[1] = '.'; */ +/* buf[2] = '.'; */ +/* buf[BUF_SIZE] = 0; */ +/* } */ +/* else { */ +/* strncpy(buf, env->tenv->base_uri.ptr, env->tenv->base_uri.len); */ +/* buf[env->tenv->result_token.value.len] = 0; */ +/* } */ +/* printf("%s", buf); */ +/* } */ + /* if(env->tenv->result_token.value.ptr != 0) { */ /* if(env->tenv->result_token.value.len > BUF_SIZE) { */ /* strncpy(buf, env->tenv->result_token.value.ptr, BUF_SIZE - 3); */ @@ -639,11 +655,11 @@ /* strncpy(buf, env->tenv->result_token.value.ptr, env->tenv->result_token.value.len); */ /* buf[env->tenv->result_token.value.len] = 0; */ /* } */ -/* printf("%03d:%03d Token ID: %s, Token: \"%s\"\n", env->tenv->result_token.line, */ +/* printf(":%03d:%03d Token ID: %s, Token: \"%s\"\n", env->tenv->result_token.line, */ /* env->tenv->result_token.column, FAXPP_token_to_string(env->tenv->result_token.type), buf); */ /* } */ /* else { */ -/* printf("%03d:%03d Token ID: %s\n", env->tenv->result_token.line, env->tenv->result_token.column, */ +/* printf(":%03d:%03d Token ID: %s\n", env->tenv->result_token.line, env->tenv->result_token.column, */ /* FAXPP_token_to_string(env->tenv->result_token.type)); */ /* } */ /* } */ @@ -659,7 +675,7 @@ (err) = (env)->tenv->state((env)->tenv); \ p_check_err((err), (env)); \ } \ -/* p_print_token(env); */ \ + /* p_print_token(env); */ \ } \ } @@ -978,7 +994,8 @@ env->event.type = END_EXTERNAL_ENTITY_EVENT; } else if(env->tenv->external_subset) { - if(env->tenv->prev->internal_subset) { + if(env->tenv->prev->internal_subset || + env->tenv->prev->external_subset) { env->next_event = nc_dtd_next_event; } else { @@ -1165,17 +1182,18 @@ return NO_ERROR; default: env->tenv->buffered_token = 1; - p_reset_event(env); if(env->tenv->external_subset || env->tenv->external_in_markup_entity) { // TBD event for start of external subset - jpcs env->next_event = nc_dtd_next_event; } else if(env->tenv->external_parsed_entity) { + p_reset_event(env); env->event.type = START_EXTERNAL_ENTITY_EVENT; env->next_event = env->main_next_event; } else { + p_reset_event(env); env->event.type = START_DOCUMENT_EVENT; env->next_event = env->main_next_event; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: John S. <joh...@or...> - 2008-12-04 15:11:53
|
Faxpp is a small, fast and conformant XML pull parser written in C with an API that can return strings in any encoding, including UTF-8 and UTF-16. This release is a beta release, to allow users to test the parser and provide feedback on problems. This release fixes a number of bugs since the previous release. Faxpp 0.4 can be downloaded from here: http://sourceforge.net/project/showfiles.php?group_id=201903 Full API documentation can be found here: http://faxpp.sourceforge.net/ The full change log can be found here: http://sourceforge.net/project/shownotes.php?group_id=201903&release_id=644844 -- John Snelson, Oracle Corporation http://snelson.org.uk/john Berkeley DB XML: http://www.oracle.com/database/berkeley-db/xml XQilla: http://xqilla.sourceforge.net |
From: <jp...@us...> - 2008-12-04 14:43:50
|
Revision: 58 http://faxpp.svn.sourceforge.net/faxpp/?rev=58&view=rev Author: jpcs Date: 2008-12-04 14:43:42 +0000 (Thu, 04 Dec 2008) Log Message: ----------- Branch for the 0.4 release. Added Paths: ----------- branch/0.4/ branch/0.4/ChangeLog Removed Paths: ------------- branch/0.4/ChangeLog Deleted: branch/0.4/ChangeLog =================================================================== --- trunk/faxpp/ChangeLog 2008-12-04 14:30:16 UTC (rev 55) +++ branch/0.4/ChangeLog 2008-12-04 14:43:42 UTC (rev 58) @@ -1,47 +0,0 @@ -Faxpp: A small, fast XML pull parser written in C with an API that can -return UTF-8 or UTF-16 strings. - -version 0.3 (2008.03.20): - * Implemented the parsing of doctype declarations and internal - subsets. - * Implemented the parsing of external subsets (DTDs) and - external parsed entities, and a mechanism for resolving them. - * Implemented entity resolution and replacement. - * Implemented a base URI for the parser, which is used to - resolve external entities. - * Changed FAXPP_set_decode() so that if the user sets a decode - function, all other indications of encoding are ignored. - * Added methods to return a decode or encode function, given a - string defining the encoding. - * Added line and column numbers to attribute values. - * Fixed a bug that was causing all strings to be copied. - * Fixed column counting, and line counting for "\r\n" at a - buffer boundary. - * Fixed the handling of namespace URIs for "xml" and "xmlns", - added raised the correct errors for redefining them. - * Fixed the parser to always return a START_DOCUMENT_EVENT. - * Fixed various conformance and memory related bugs. - -version 0.2 (2007.08.20): - * Implemented a test harness for the XML Conformance Test Suite - * Added support for streaming input to the tokenizer and parser, - without the need for a callback function. - * Implemented an encoding framework to allow the user to specify - implementation defined encodings based on a document's - declared encoding. - * Added built-in decoding support for ISO-8859-1 (Latin1). - * Implemented attribute value normalization, adding a parser - option to enable/disable it. - * Recognize built in entity references - raise an error - for any unknown entity. - * Implemented the XML 1.0 char classes as well as the XML 1.1 - ones, and switched between them depending on the XML version - declaration. - * Added tokens for the end of the XML declaration, and the built - in entity references. - * Check for restricted characters anywhere in the document. - * Check for illegal "]]>" in element content. - * Require whitespace between attributes. - * Fix handling of namespace 1.1 prefix undefines. - * Fixed a bug in entity output in the examples. - * Added a todo list. Copied: branch/0.4/ChangeLog (from rev 57, trunk/faxpp/ChangeLog) =================================================================== --- branch/0.4/ChangeLog (rev 0) +++ branch/0.4/ChangeLog 2008-12-04 14:43:42 UTC (rev 58) @@ -0,0 +1,56 @@ +Faxpp: A small, fast XML pull parser written in C with an API that can +return UTF-8 or UTF-16 strings. + +version 0.4 (2008.12.04): + * Added the examples to the documentation. + * Fixed a bug where parsing would not resume properly after + certain error cases. + * Fixed a freed memory read bug in attribute normalization. + * Fixed a bug in entity replacement when the output encoding is + not UTF-8. + * Fixed Windows build. + +version 0.3 (2008.03.20): + * Implemented the parsing of doctype declarations and internal + subsets. + * Implemented the parsing of external subsets (DTDs) and + external parsed entities, and a mechanism for resolving them. + * Implemented entity resolution and replacement. + * Implemented a base URI for the parser, which is used to + resolve external entities. + * Changed FAXPP_set_decode() so that if the user sets a decode + function, all other indications of encoding are ignored. + * Added methods to return a decode or encode function, given a + string defining the encoding. + * Added line and column numbers to attribute values. + * Fixed a bug that was causing all strings to be copied. + * Fixed column counting, and line counting for "\r\n" at a + buffer boundary. + * Fixed the handling of namespace URIs for "xml" and "xmlns", + added raised the correct errors for redefining them. + * Fixed the parser to always return a START_DOCUMENT_EVENT. + * Fixed various conformance and memory related bugs. + +version 0.2 (2007.08.20): + * Implemented a test harness for the XML Conformance Test Suite + * Added support for streaming input to the tokenizer and parser, + without the need for a callback function. + * Implemented an encoding framework to allow the user to specify + implementation defined encodings based on a document's + declared encoding. + * Added built-in decoding support for ISO-8859-1 (Latin1). + * Implemented attribute value normalization, adding a parser + option to enable/disable it. + * Recognize built in entity references - raise an error + for any unknown entity. + * Implemented the XML 1.0 char classes as well as the XML 1.1 + ones, and switched between them depending on the XML version + declaration. + * Added tokens for the end of the XML declaration, and the built + in entity references. + * Check for restricted characters anywhere in the document. + * Check for illegal "]]>" in element content. + * Require whitespace between attributes. + * Fix handling of namespace 1.1 prefix undefines. + * Fixed a bug in entity output in the examples. + * Added a todo list. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-12-04 14:39:20
|
Revision: 57 http://faxpp.svn.sourceforge.net/faxpp/?rev=57&view=rev Author: jpcs Date: 2008-12-04 14:39:17 +0000 (Thu, 04 Dec 2008) Log Message: ----------- Fixed formatting. Modified Paths: -------------- trunk/faxpp/ChangeLog Modified: trunk/faxpp/ChangeLog =================================================================== --- trunk/faxpp/ChangeLog 2008-12-04 14:37:57 UTC (rev 56) +++ trunk/faxpp/ChangeLog 2008-12-04 14:39:17 UTC (rev 57) @@ -3,11 +3,11 @@ version 0.4 (2008.12.04): * Added the examples to the documentation. - * Fixed a bug where parsing would not resume properly after certain - error cases. + * Fixed a bug where parsing would not resume properly after + certain error cases. * Fixed a freed memory read bug in attribute normalization. - * Fixed a bug in entity replacement when the output encoding is not - UTF-8. + * Fixed a bug in entity replacement when the output encoding is + not UTF-8. * Fixed Windows build. version 0.3 (2008.03.20): This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-12-04 14:38:03
|
Revision: 56 http://faxpp.svn.sourceforge.net/faxpp/?rev=56&view=rev Author: jpcs Date: 2008-12-04 14:37:57 +0000 (Thu, 04 Dec 2008) Log Message: ----------- Updated the change log for the 0.4 release. Modified Paths: -------------- trunk/faxpp/ChangeLog Modified: trunk/faxpp/ChangeLog =================================================================== --- trunk/faxpp/ChangeLog 2008-12-04 14:30:16 UTC (rev 55) +++ trunk/faxpp/ChangeLog 2008-12-04 14:37:57 UTC (rev 56) @@ -1,6 +1,15 @@ Faxpp: A small, fast XML pull parser written in C with an API that can return UTF-8 or UTF-16 strings. +version 0.4 (2008.12.04): + * Added the examples to the documentation. + * Fixed a bug where parsing would not resume properly after certain + error cases. + * Fixed a freed memory read bug in attribute normalization. + * Fixed a bug in entity replacement when the output encoding is not + UTF-8. + * Fixed Windows build. + version 0.3 (2008.03.20): * Implemented the parsing of doctype declarations and internal subsets. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-12-04 14:30:20
|
Revision: 55 http://faxpp.svn.sourceforge.net/faxpp/?rev=55&view=rev Author: jpcs Date: 2008-12-04 14:30:16 +0000 (Thu, 04 Dec 2008) Log Message: ----------- Bumped version numbers. Modified Paths: -------------- trunk/faxpp/Makefile.am trunk/faxpp/Makefile.in trunk/faxpp/configure trunk/faxpp/configure.in trunk/faxpp/docs/Doxyfile.api Modified: trunk/faxpp/Makefile.am =================================================================== --- trunk/faxpp/Makefile.am 2008-12-04 14:29:33 UTC (rev 54) +++ trunk/faxpp/Makefile.am 2008-12-04 14:30:16 UTC (rev 55) @@ -7,7 +7,7 @@ lib_LTLIBRARIES = libfaxpp.la # Library version, see http://sourceware.org/autobook/autobook/autobook_91.html -libfaxpp_la_LDFLAGS = -version-info 2:0:0 -no-undefined +libfaxpp_la_LDFLAGS = -version-info 2:1:0 -no-undefined libfaxpp_la_SOURCES = \ src/attr_states.h \ Modified: trunk/faxpp/Makefile.in =================================================================== --- trunk/faxpp/Makefile.in 2008-12-04 14:29:33 UTC (rev 54) +++ trunk/faxpp/Makefile.in 2008-12-04 14:30:16 UTC (rev 55) @@ -234,7 +234,7 @@ lib_LTLIBRARIES = libfaxpp.la # Library version, see http://sourceware.org/autobook/autobook/autobook_91.html -libfaxpp_la_LDFLAGS = -version-info 2:0:0 -no-undefined +libfaxpp_la_LDFLAGS = -version-info 2:1:0 -no-undefined libfaxpp_la_SOURCES = \ src/attr_states.h \ src/buffer.c \ Modified: trunk/faxpp/configure =================================================================== --- trunk/faxpp/configure 2008-12-04 14:29:33 UTC (rev 54) +++ trunk/faxpp/configure 2008-12-04 14:30:16 UTC (rev 55) @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.61 for faxpp 0.3. +# Generated by GNU Autoconf 2.61 for faxpp 0.4. # # Report bugs to <jo...@sn...>. # @@ -728,8 +728,8 @@ # Identity of this package. PACKAGE_NAME='faxpp' PACKAGE_TARNAME='faxpp' -PACKAGE_VERSION='0.3' -PACKAGE_STRING='faxpp 0.3' +PACKAGE_VERSION='0.4' +PACKAGE_STRING='faxpp 0.4' PACKAGE_BUGREPORT='jo...@sn...' ac_unique_file="include/faxpp/parser.h" @@ -1393,7 +1393,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures faxpp 0.3 to adapt to many kinds of systems. +\`configure' configures faxpp 0.4 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1463,7 +1463,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of faxpp 0.3:";; + short | recursive ) echo "Configuration of faxpp 0.4:";; esac cat <<\_ACEOF @@ -1567,7 +1567,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -faxpp configure 0.3 +faxpp configure 0.4 generated by GNU Autoconf 2.61 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1581,7 +1581,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by faxpp $as_me 0.3, which was +It was created by faxpp $as_me 0.4, which was generated by GNU Autoconf 2.61. Invocation command line was $ $0 $@ @@ -2271,7 +2271,7 @@ # Define the identity of the package. PACKAGE=faxpp - VERSION=0.3 + VERSION=0.4 # Some tools Automake needs. @@ -22081,7 +22081,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by faxpp $as_me 0.3, which was +This file was extended by faxpp $as_me 0.4, which was generated by GNU Autoconf 2.61. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -22134,7 +22134,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -faxpp config.status 0.3 +faxpp config.status 0.4 configured by $0, generated by GNU Autoconf 2.61, with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" Modified: trunk/faxpp/configure.in =================================================================== --- trunk/faxpp/configure.in 2008-12-04 14:29:33 UTC (rev 54) +++ trunk/faxpp/configure.in 2008-12-04 14:30:16 UTC (rev 55) @@ -2,9 +2,9 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ(2.59) -AC_INIT(faxpp, 0.3, jo...@sn...) +AC_INIT(faxpp, 0.4, jo...@sn...) AC_CONFIG_AUX_DIR(autotools) -AM_INIT_AUTOMAKE(faxpp, 0.3, jo...@sn...) +AM_INIT_AUTOMAKE(faxpp, 0.4, jo...@sn...) AC_CONFIG_SRCDIR([include/faxpp/parser.h]) AM_CONFIG_HEADER([src/config.h]) Modified: trunk/faxpp/docs/Doxyfile.api =================================================================== --- trunk/faxpp/docs/Doxyfile.api 2008-12-04 14:29:33 UTC (rev 54) +++ trunk/faxpp/docs/Doxyfile.api 2008-12-04 14:30:16 UTC (rev 55) @@ -31,7 +31,7 @@ # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 0.3 +PROJECT_NUMBER = 0.4 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-12-04 14:29:39
|
Revision: 54 http://faxpp.svn.sourceforge.net/faxpp/?rev=54&view=rev Author: jpcs Date: 2008-12-04 14:29:33 +0000 (Thu, 04 Dec 2008) Log Message: ----------- Fixed documentation. Modified Paths: -------------- trunk/faxpp/include/faxpp/event.h Modified: trunk/faxpp/include/faxpp/event.h =================================================================== --- trunk/faxpp/include/faxpp/event.h 2008-08-08 11:25:17 UTC (rev 53) +++ trunk/faxpp/include/faxpp/event.h 2008-12-04 14:29:33 UTC (rev 54) @@ -96,8 +96,8 @@ FAXPP_Attribute *attrs; ///< Array of attributes. \details Present for event types START_ELEMENT_EVENT, and SELF_CLOSING_ELEMENT_EVENT FAXPP_Text version; ///< The version of the event. \details Present for event types START_DOCUMENT_EVENT and START_EXTERNAL_ENTITY_EVENT - FAXPP_Text encoding; ///< The version of the event. \details Present for event types START_DOCUMENT_EVENT and START_EXTERNAL_ENTITY_EVENT - FAXPP_Text standalone; ///< The version of the event. \details Present only for the START_DOCUMENT_EVENT event type + FAXPP_Text encoding; ///< The encoding of the event. \details Present for event types START_DOCUMENT_EVENT and START_EXTERNAL_ENTITY_EVENT + FAXPP_Text standalone; ///< The standalone status of the event. \details Present only for the START_DOCUMENT_EVENT event type FAXPP_Text system_id; ///< The system literal of the event. \details Present for event types DOCTYPE_EVENT and ENTITY_REFERENCE_EVENT FAXPP_Text public_id; ///< The public ID literal of the event. \details Present for event types DOCTYPE_EVENT and ENTITY_REFERENCE_EVENT This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-08-08 11:25:08
|
Revision: 53 http://faxpp.svn.sourceforge.net/faxpp/?rev=53&view=rev Author: jpcs Date: 2008-08-08 11:25:17 +0000 (Fri, 08 Aug 2008) Log Message: ----------- Fixed a bug where parsing would not resume properly after certain error cases. Fixed a freed memory read bug in attribute normalization. Modified Paths: -------------- trunk/faxpp/src/buffer.c trunk/faxpp/src/buffer.h trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_parser.h Modified: trunk/faxpp/src/buffer.c =================================================================== --- trunk/faxpp/src/buffer.c 2008-05-22 16:39:07 UTC (rev 52) +++ trunk/faxpp/src/buffer.c 2008-08-08 11:25:17 UTC (rev 53) @@ -91,6 +91,20 @@ return NO_ERROR; } +FAXPP_Error FAXPP_buffer_append_text(FAXPP_Buffer *buffer, const FAXPP_Text *text) +{ + FAXPP_Error err; + if(buffer->cursor + text->len > buffer->buffer + buffer->length) { + err = FAXPP_resize_buffer(buffer, (buffer->cursor + text->len) - buffer->buffer); + if(err != 0) return err; + } + + memcpy(buffer->cursor, text->ptr, text->len); + buffer->cursor += text->len; + + return NO_ERROR; +} + FAXPP_Error FAXPP_buffer_append_ch(FAXPP_Buffer *buffer, FAXPP_EncodeFunction encode, Char32 ch) { FAXPP_Error err; Modified: trunk/faxpp/src/buffer.h =================================================================== --- trunk/faxpp/src/buffer.h 2008-05-22 16:39:07 UTC (rev 52) +++ trunk/faxpp/src/buffer.h 2008-08-08 11:25:17 UTC (rev 53) @@ -19,6 +19,7 @@ #include <faxpp/error.h> #include <faxpp/transcode.h> +#include <faxpp/text.h> typedef struct FAXPP_Buffer_s FAXPP_Buffer; typedef void (*FAXPP_BufferResizeCallback)(void *userData, FAXPP_Buffer *buffer, void *newFAXPP_Buffer); @@ -42,6 +43,7 @@ FAXPP_Error FAXPP_resize_buffer(FAXPP_Buffer *buffer, unsigned int minSize); FAXPP_Error FAXPP_buffer_append(FAXPP_Buffer *buffer, void *ptr, unsigned int len); +FAXPP_Error FAXPP_buffer_append_text(FAXPP_Buffer *buffer, const FAXPP_Text *text); FAXPP_Error FAXPP_buffer_append_ch(FAXPP_Buffer *buffer, FAXPP_EncodeFunction encode, Char32 ch); #endif Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-05-22 16:39:07 UTC (rev 52) +++ trunk/faxpp/src/xml_parser.c 2008-08-08 11:25:17 UTC (rev 53) @@ -273,6 +273,7 @@ env->current_notation = 0; env->standalone = 0; + env->event_returned = 0; env->xml_version = XML_VERSION_NOT_KNOWN; // Put the element info objects back in the pool @@ -389,7 +390,14 @@ FAXPP_Error FAXPP_next_event(FAXPP_Parser *env) { - return env->next_event(env); + FAXPP_Error err; + do { + err = env->next_event(env); + if(err != NO_ERROR) return err; + } while(env->event.type == NO_EVENT); + + env->event_returned = 1; + return err; } const FAXPP_Event *FAXPP_get_current_event(const FAXPP_Parser *parser) @@ -681,16 +689,20 @@ { FAXPP_Error err; - text->ptr = buffer->cursor; + text->len = 0; while(value) { - err = FAXPP_buffer_append(buffer, value->value.ptr, value->value.len); + err = FAXPP_buffer_append_text(buffer, &value->value); if(err != NO_ERROR) return err; + + text->len += value->value.len; value = value->next; } - - text->len = buffer->cursor - text->ptr; + // Work out the pointer from the calculated length - since a buffer + // reallocation won't necessarily update text->ptr + text->ptr = buffer->cursor - text->len; + if(env->tenv->null_terminate) return FAXPP_buffer_append_ch(buffer, env->tenv->transcoder.encode, 0); @@ -883,6 +895,9 @@ // Reset the event buffer cursor FAXPP_reset_buffer(&env->event_buffer); + // This event has not been returned yet + env->event_returned = 0; + // Clear the event env->event.type = NO_EVENT; @@ -1188,6 +1203,8 @@ static FAXPP_Error p_create_entity_info(FAXPP_ParserEnv *env, FAXPP_EntityInfo **list) { + const FAXPP_Text *text; + FAXPP_EntityInfo *ent = (FAXPP_EntityInfo*)malloc(sizeof(FAXPP_EntityInfo)); if(!ent) return OUT_OF_MEMORY; @@ -1200,7 +1217,10 @@ } *list = ent; - p_set_text_from_text(&ent->base_uri, FAXPP_get_base_uri(env)); + text = FAXPP_get_base_uri(env); + if(text != 0) { + p_set_text_from_text(&ent->base_uri, text); + } p_force_copy_text_from_token(&ent->name, env, &env->entity_buffer); p_set_location_from_token(ent, env); @@ -1863,7 +1883,7 @@ p_set_event_location_from_token(env); break; case START_ELEMENT_NAME_TOKEN: - if(env->event.type != NO_EVENT) { + if(env->event_returned == 1) { p_reset_event(env); } p_copy_text_from_token(&env->event.name, env, /*useTokenBuffer*/0); @@ -1926,7 +1946,7 @@ p_set_event_location_from_token(env); break; case END_ELEMENT_NAME_TOKEN: - if(env->event.type != NO_EVENT) { + if(env->event_returned == 1) { p_reset_event(env); } p_copy_text_from_token(&env->event.name, env, /*useTokenBuffer*/0); @@ -2183,7 +2203,6 @@ if(attr->prefix.len != 0) { p_copy_text_from_event(&nsinfo->prefix, &attr->name, env, &env->element_info_stack->buffer); } - return NO_ERROR; } Modified: trunk/faxpp/src/xml_parser.h =================================================================== --- trunk/faxpp/src/xml_parser.h 2008-05-22 16:39:07 UTC (rev 52) +++ trunk/faxpp/src/xml_parser.h 2008-08-08 11:25:17 UTC (rev 53) @@ -118,6 +118,7 @@ unsigned int current_notation:1; unsigned int standalone:1; + unsigned int event_returned:1; enum { XML_VERSION_NOT_KNOWN = 0, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-05-22 16:39:05
|
Revision: 52 http://faxpp.svn.sourceforge.net/faxpp/?rev=52&view=rev Author: jpcs Date: 2008-05-22 09:39:07 -0700 (Thu, 22 May 2008) Log Message: ----------- Fixed a bug in entity replacement when the output encoding is not UTF-8. Added the examples to the documentation. Modified Paths: -------------- trunk/faxpp/Makefile.in trunk/faxpp/aclocal.m4 trunk/faxpp/configure trunk/faxpp/configure.in trunk/faxpp/docs/Doxyfile.api trunk/faxpp/include/Makefile.in trunk/faxpp/include/faxpp/parser.h trunk/faxpp/include/faxpp/tokenizer.h trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_tokenizer.c trunk/faxpp/src/xml_tokenizer.h trunk/faxpp/src/xmldecl.c Modified: trunk/faxpp/Makefile.in =================================================================== --- trunk/faxpp/Makefile.in 2008-03-27 15:18:13 UTC (rev 51) +++ trunk/faxpp/Makefile.in 2008-05-22 16:39:07 UTC (rev 52) @@ -1,8 +1,8 @@ -# Makefile.in generated by automake 1.10 from Makefile.am. +# Makefile.in generated by automake 1.10.1 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -# 2003, 2004, 2005, 2006 Free Software Foundation, Inc. +# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. @@ -81,7 +81,7 @@ entity_resolver.$(OBJEXT) output_event.$(OBJEXT) xmlconf_runner_OBJECTS = $(am_xmlconf_runner_OBJECTS) xmlconf_runner_DEPENDENCIES = libfaxpp.la -DEFAULT_INCLUDES = -I. -I$(top_builddir)/src@am__isrc@ +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src depcomp = $(SHELL) $(top_srcdir)/autotools/depcomp am__depfiles_maybe = depfiles COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ @@ -139,6 +139,7 @@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ +DSYMUTIL = @DSYMUTIL@ ECHO = @ECHO@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ @@ -161,6 +162,7 @@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MKDIR_P = @MKDIR_P@ +NMEDIT = @NMEDIT@ OBJEXT = @OBJEXT@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ @@ -170,6 +172,7 @@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ RANLIB = @RANLIB@ +SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ @@ -349,8 +352,8 @@ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ if test -f $$p; then \ f=$(am__strip_dir) \ - echo " $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) '$$p' '$(DESTDIR)$(libdir)/$$f'"; \ - $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) "$$p" "$(DESTDIR)$(libdir)/$$f"; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) '$$p' '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) "$$p" "$(DESTDIR)$(libdir)/$$f"; \ else :; fi; \ done @@ -358,8 +361,8 @@ @$(NORMAL_UNINSTALL) @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ p=$(am__strip_dir) \ - echo " $(LIBTOOL) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$p'"; \ - $(LIBTOOL) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$p"; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$p'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$p"; \ done clean-libLTLIBRARIES: @@ -744,8 +747,8 @@ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ + $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ mkid -fID $$unique tags: TAGS @@ -770,8 +773,8 @@ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ @@ -781,13 +784,12 @@ CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) tags=; \ - here=`pwd`; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$tags$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$tags $$unique @@ -858,6 +860,10 @@ tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2 $(am__remove_distdir) +dist-lzma: distdir + tardir=$(distdir) && $(am__tar) | lzma -9 -c >$(distdir).tar.lzma + $(am__remove_distdir) + dist-tarZ: distdir tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z $(am__remove_distdir) @@ -885,6 +891,8 @@ GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\ *.tar.bz2*) \ bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\ + *.tar.lzma*) \ + unlzma -c $(distdir).tar.lzma | $(am__untar) ;;\ *.tar.Z*) \ uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ *.shar.gz*) \ @@ -1043,17 +1051,17 @@ .PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ all all-am am--refresh check check-am clean clean-generic \ clean-libLTLIBRARIES clean-libtool clean-noinstPROGRAMS ctags \ - ctags-recursive dist dist-all dist-bzip2 dist-gzip dist-shar \ - dist-tarZ dist-zip distcheck distclean distclean-compile \ - distclean-generic distclean-hdr distclean-libtool \ - distclean-tags distcleancheck distdir distuninstallcheck dvi \ - dvi-am html html-am info info-am install install-am \ - install-data install-data-am install-dvi install-dvi-am \ - install-exec install-exec-am install-html install-html-am \ - install-info install-info-am install-libLTLIBRARIES \ - install-man install-pdf install-pdf-am install-ps \ - install-ps-am install-strip installcheck installcheck-am \ - installdirs installdirs-am maintainer-clean \ + ctags-recursive dist dist-all dist-bzip2 dist-gzip dist-lzma \ + dist-shar dist-tarZ dist-zip distcheck distclean \ + distclean-compile distclean-generic distclean-hdr \ + distclean-libtool distclean-tags distcleancheck distdir \ + distuninstallcheck dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am \ + install-libLTLIBRARIES install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags tags-recursive uninstall uninstall-am \ Modified: trunk/faxpp/aclocal.m4 =================================================================== --- trunk/faxpp/aclocal.m4 2008-03-27 15:18:13 UTC (rev 51) +++ trunk/faxpp/aclocal.m4 2008-05-22 16:39:07 UTC (rev 52) @@ -1,7 +1,7 @@ -# generated automatically by aclocal 1.10 -*- Autoconf -*- +# generated automatically by aclocal 1.10.1 -*- Autoconf -*- # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, -# 2005, 2006 Free Software Foundation, Inc. +# 2005, 2006, 2007, 2008 Free Software Foundation, Inc. # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. @@ -11,14 +11,17 @@ # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. -m4_if(m4_PACKAGE_VERSION, [2.61],, -[m4_fatal([this file was generated for autoconf 2.61. -You have another version of autoconf. If you want to use that, -you should regenerate the build system entirely.], [63])]) +m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl +m4_if(AC_AUTOCONF_VERSION, [2.61],, +[m4_warning([this file was generated for autoconf 2.61. +You have another version of autoconf. It may work, but is not guaranteed to. +If you have problems, you may need to regenerate the build system entirely. +To do so, use the procedure documented by the package, typically `autoreconf'.])]) # libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- -# serial 48 Debian 1.5.22-4 AC_PROG_LIBTOOL +# serial 52 Debian 1.5.26-1ubuntu1 AC_PROG_LIBTOOL # AC_PROVIDE_IFELSE(MACRO-NAME, IF-PROVIDED, IF-NOT-PROVIDED) @@ -106,7 +109,6 @@ AC_REQUIRE([AC_OBJEXT])dnl AC_REQUIRE([AC_EXEEXT])dnl dnl - AC_LIBTOOL_SYS_MAX_CMD_LEN AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE AC_LIBTOOL_OBJDIR @@ -181,7 +183,7 @@ test -z "$ac_objext" && ac_objext=o # Determine commands to create old-style static archives. -old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs$old_deplibs' +old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' old_postinstall_cmds='chmod 644 $oldlib' old_postuninstall_cmds= @@ -208,6 +210,8 @@ ;; esac +_LT_REQUIRED_DARWIN_CHECKS + AC_PROVIDE_IFELSE([AC_LIBTOOL_DLOPEN], enable_dlopen=yes, enable_dlopen=no) AC_PROVIDE_IFELSE([AC_LIBTOOL_WIN32_DLL], enable_win32_dll=yes, enable_win32_dll=no) @@ -268,8 +272,9 @@ # Check for compiler boilerplate output or warnings with # the simple compiler test code. AC_DEFUN([_LT_COMPILER_BOILERPLATE], -[ac_outfile=conftest.$ac_objext -printf "$lt_simple_compile_test_code" >conftest.$ac_ext +[AC_REQUIRE([LT_AC_PROG_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $rm conftest* @@ -281,14 +286,86 @@ # Check for linker boilerplate output or warnings with # the simple link test code. AC_DEFUN([_LT_LINKER_BOILERPLATE], -[ac_outfile=conftest.$ac_objext -printf "$lt_simple_link_test_code" >conftest.$ac_ext +[AC_REQUIRE([LT_AC_PROG_SED])dnl +ac_outfile=conftest.$ac_objext +echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` -$rm conftest* +$rm -r conftest* ])# _LT_LINKER_BOILERPLATE +# _LT_REQUIRED_DARWIN_CHECKS +# -------------------------- +# Check for some things on darwin +AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS],[ + case $host_os in + rhapsody* | darwin*) + AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:]) + AC_CHECK_TOOL([NMEDIT], [nmedit], [:]) + AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod], + [lt_cv_apple_cc_single_mod=no + if test -z "${LT_MULTI_MODULE}"; then + # By default we will add the -single_module flag. You can override + # by either setting the environment variable LT_MULTI_MODULE + # non-empty at configure time, or by adding -multi_module to the + # link flags. + echo "int foo(void){return 1;}" > conftest.c + $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ + -dynamiclib ${wl}-single_module conftest.c + if test -f libconftest.dylib; then + lt_cv_apple_cc_single_mod=yes + rm -rf libconftest.dylib* + fi + rm conftest.c + fi]) + AC_CACHE_CHECK([for -exported_symbols_list linker flag], + [lt_cv_ld_exported_symbols_list], + [lt_cv_ld_exported_symbols_list=no + save_LDFLAGS=$LDFLAGS + echo "_main" > conftest.sym + LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" + AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], + [lt_cv_ld_exported_symbols_list=yes], + [lt_cv_ld_exported_symbols_list=no]) + LDFLAGS="$save_LDFLAGS" + ]) + case $host_os in + rhapsody* | darwin1.[[0123]]) + _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;; + darwin1.*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + darwin*) + # if running on 10.5 or later, the deployment target defaults + # to the OS version, if on x86, and 10.4, the deployment + # target defaults to 10.4. Don't you love it? + case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in + 10.0,*86*-darwin8*|10.0,*-darwin[[91]]*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + 10.[[012]]*) + _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; + 10.*) + _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; + esac + ;; + esac + if test "$lt_cv_apple_cc_single_mod" = "yes"; then + _lt_dar_single_mod='$single_module' + fi + if test "$lt_cv_ld_exported_symbols_list" = "yes"; then + _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym' + else + _lt_dar_export_syms="~$NMEDIT -s \$output_objdir/\${libname}-symbols.expsym \${lib}" + fi + if test "$DSYMUTIL" != ":"; then + _lt_dsymutil="~$DSYMUTIL \$lib || :" + else + _lt_dsymutil= + fi + ;; + esac +]) + # _LT_AC_SYS_LIBPATH_AIX # ---------------------- # Links a minimal program and checks the executable @@ -298,12 +375,20 @@ # If we don't find anything, use the default library path according # to the aix ld manual. AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX], -[AC_LINK_IFELSE(AC_LANG_PROGRAM,[ -aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0 *\(.*\)$/\1/; p; } -}'` +[AC_REQUIRE([LT_AC_PROG_SED])dnl +AC_LINK_IFELSE(AC_LANG_PROGRAM,[ +lt_aix_libpath_sed=' + /Import File Strings/,/^$/ { + /^0/ { + s/^0 *\(.*\)$/\1/ + p + } + }' +aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. -if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0 *\(.*\)$/\1/; p; } -}'`; fi],[]) +if test -z "$aix_libpath"; then + aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` +fi],[]) if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi ])# _LT_AC_SYS_LIBPATH_AIX @@ -534,13 +619,17 @@ rm -rf conftest* ;; -x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*|s390*-*linux*|sparc*-*linux*) +x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ +s390*-*linux*|sparc*-*linux*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then case `/usr/bin/file conftest.o` in *32-bit*) case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_i386_fbsd" + ;; x86_64-*linux*) LD="${LD-ld} -m elf_i386" ;; @@ -557,6 +646,9 @@ ;; *64-bit*) case $host in + x86_64-*kfreebsd*-gnu) + LD="${LD-ld} -m elf_x86_64_fbsd" + ;; x86_64-*linux*) LD="${LD-ld} -m elf_x86_64" ;; @@ -597,7 +689,11 @@ *64-bit*) case $lt_cv_prog_gnu_ld in yes*) LD="${LD-ld} -m elf64_sparc" ;; - *) LD="${LD-ld} -64" ;; + *) + if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then + LD="${LD-ld} -64" + fi + ;; esac ;; esac @@ -628,7 +724,7 @@ AC_CACHE_CHECK([$1], [$2], [$2=no ifelse([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) - printf "$lt_simple_compile_test_code" > conftest.$ac_ext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$3" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. @@ -669,11 +765,12 @@ # ------------------------------------------------------------ # Check whether the given compiler option works AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], -[AC_CACHE_CHECK([$1], [$2], +[AC_REQUIRE([LT_AC_PROG_SED])dnl +AC_CACHE_CHECK([$1], [$2], [$2=no save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS $3" - printf "$lt_simple_link_test_code" > conftest.$ac_ext + echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings @@ -689,7 +786,7 @@ $2=yes fi fi - $rm conftest* + $rm -r conftest* LDFLAGS="$save_LDFLAGS" ]) @@ -787,24 +884,27 @@ fi ;; *) - # If test is not a shell built-in, we'll probably end up computing a - # maximum length that is only half of the actual maximum length, but - # we can't tell. - SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} - while (test "X"`$SHELL [$]0 --fallback-echo "X$teststring" 2>/dev/null` \ + lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` + if test -n "$lt_cv_sys_max_cmd_len"; then + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` + else + SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} + while (test "X"`$SHELL [$]0 --fallback-echo "X$teststring" 2>/dev/null` \ = "XX$teststring") >/dev/null 2>&1 && - new_result=`expr "X$teststring" : ".*" 2>&1` && - lt_cv_sys_max_cmd_len=$new_result && - test $i != 17 # 1/2 MB should be enough - do - i=`expr $i + 1` - teststring=$teststring$teststring - done - teststring= - # Add a significant safety factor because C++ compilers can tack on massive - # amounts of additional arguments before passing them to the linker. - # It appears as though 1/2 is a usable value. - lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + new_result=`expr "X$teststring" : ".*" 2>&1` && + lt_cv_sys_max_cmd_len=$new_result && + test $i != 17 # 1/2 MB should be enough + do + i=`expr $i + 1` + teststring=$teststring$teststring + done + teststring= + # Add a significant safety factor because C++ compilers can tack on massive + # amounts of additional arguments before passing them to the linker. + # It appears as though 1/2 is a usable value. + lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` + fi ;; esac ]) @@ -957,7 +1057,7 @@ AC_CHECK_FUNC([shl_load], [lt_cv_dlopen="shl_load"], [AC_CHECK_LIB([dld], [shl_load], - [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-dld"], + [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"], [AC_CHECK_FUNC([dlopen], [lt_cv_dlopen="dlopen"], [AC_CHECK_LIB([dl], [dlopen], @@ -965,7 +1065,7 @@ [AC_CHECK_LIB([svld], [dlopen], [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], [AC_CHECK_LIB([dld], [dld_link], - [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld"]) + [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"]) ]) ]) ]) @@ -1031,7 +1131,8 @@ # --------------------------------- # Check to see if options -c and -o are simultaneously supported by compiler AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O], -[AC_REQUIRE([_LT_AC_SYS_COMPILER])dnl +[AC_REQUIRE([LT_AC_PROG_SED])dnl +AC_REQUIRE([_LT_AC_SYS_COMPILER])dnl AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], [_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)], [_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no @@ -1039,7 +1140,7 @@ mkdir conftest cd conftest mkdir out - printf "$lt_simple_compile_test_code" > conftest.$ac_ext + echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or @@ -1179,6 +1280,7 @@ darwin*) if test -n "$STRIP" ; then striplib="$STRIP -x" + old_striplib="$STRIP -S" AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) @@ -1196,7 +1298,8 @@ # ----------------------------- # PORTME Fill in your ld.so characteristics AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER], -[AC_MSG_CHECKING([dynamic linker characteristics]) +[AC_REQUIRE([LT_AC_PROG_SED])dnl +AC_MSG_CHECKING([dynamic linker characteristics]) library_names_spec= libname_spec='lib$name' soname_spec= @@ -1210,20 +1313,58 @@ version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" +m4_if($1,[],[ if test "$GCC" = yes; then - sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"` - if echo "$sys_lib_search_path_spec" | grep ';' >/dev/null ; then + case $host_os in + darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; + *) lt_awk_arg="/^libraries:/" ;; + esac + lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e "s,=/,/,g"` + if echo "$lt_search_path_spec" | grep ';' >/dev/null ; then # if the path contains ";" then we assume it to be the separator # otherwise default to the standard path separator (i.e. ":") - it is # assumed that no part of a normal pathname contains ";" but that should # okay in the real world where ";" in dirpaths is itself problematic. - sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` + lt_search_path_spec=`echo "$lt_search_path_spec" | $SED -e 's/;/ /g'` else - sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` + lt_search_path_spec=`echo "$lt_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi + # Ok, now we have the path, separated by spaces, we can step through it + # and add multilib dir if necessary. + lt_tmp_lt_search_path_spec= + lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` + for lt_sys_path in $lt_search_path_spec; do + if test -d "$lt_sys_path/$lt_multi_os_dir"; then + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" + else + test -d "$lt_sys_path" && \ + lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" + fi + done + lt_search_path_spec=`echo $lt_tmp_lt_search_path_spec | awk ' +BEGIN {RS=" "; FS="/|\n";} { + lt_foo=""; + lt_count=0; + for (lt_i = NF; lt_i > 0; lt_i--) { + if ($lt_i != "" && $lt_i != ".") { + if ($lt_i == "..") { + lt_count++; + } else { + if (lt_count == 0) { + lt_foo="/" $lt_i lt_foo; + } else { + lt_count--; + } + } + } + } + if (lt_foo != "") { lt_freq[[lt_foo]]++; } + if (lt_freq[[lt_foo]] == 1) { print lt_foo; } +}'` + sys_lib_search_path_spec=`echo $lt_search_path_spec` else sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" -fi +fi]) need_lib_prefix=unknown hardcode_into_libs=no @@ -1241,7 +1382,7 @@ soname_spec='${libname}${release}${shared_ext}$major' ;; -aix4* | aix5*) +aix[[4-9]]*) version_type=linux need_lib_prefix=no need_version=no @@ -1380,12 +1521,8 @@ shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' - # Apple's gcc prints 'gcc -print-search-dirs' doesn't operate the same. - if test "$GCC" = yes; then - sys_lib_search_path_spec=`$CC -print-search-dirs | tr "\n" "$PATH_SEPARATOR" | sed -e 's/libraries:/@libraries:/' | tr "@" "\n" | grep "^libraries:" | sed -e "s/^libraries://" -e "s,=/,/,g" -e "s,$PATH_SEPARATOR, ,g" -e "s,.*,& /lib /usr/lib /usr/local/lib,g"` - else - sys_lib_search_path_spec='/lib /usr/lib /usr/local/lib' - fi + m4_if([$1], [],[ + sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; @@ -1439,7 +1576,7 @@ shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; - freebsd*) # from 4.6 on + *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; @@ -1502,7 +1639,7 @@ postinstall_cmds='chmod 555 $lib' ;; -interix3*) +interix[[3-9]]*) version_type=linux need_lib_prefix=no need_version=no @@ -1573,7 +1710,7 @@ # Append ld.so.conf contents to the search path if test -f /etc/ld.so.conf; then - lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '` + lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" fi @@ -1679,6 +1816,10 @@ sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" ;; +rdos*) + dynamic_linker=no + ;; + solaris*) version_type=linux need_lib_prefix=no @@ -1774,6 +1915,13 @@ AC_MSG_RESULT([$dynamic_linker]) test "$dynamic_linker" = no && can_build_shared=no +AC_CACHE_VAL([lt_cv_sys_lib_search_path_spec], +[lt_cv_sys_lib_search_path_spec="$sys_lib_search_path_spec"]) +sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" +AC_CACHE_VAL([lt_cv_sys_lib_dlsearch_path_spec], +[lt_cv_sys_lib_dlsearch_path_spec="$sys_lib_dlsearch_path_spec"]) +sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" + variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test "$GCC" = yes; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" @@ -1784,7 +1932,8 @@ # _LT_AC_TAGCONFIG # ---------------- AC_DEFUN([_LT_AC_TAGCONFIG], -[AC_ARG_WITH([tags], +[AC_REQUIRE([LT_AC_PROG_SED])dnl +AC_ARG_WITH([tags], [AC_HELP_STRING([--with-tags@<:@=TAGS@:>@], [include additional configurations @<:@automatic@:>@])], [tagnames="$withval"]) @@ -2045,7 +2194,7 @@ # AC_PATH_TOOL_PREFIX # ------------------- -# find a file program which can recognise shared library +# find a file program which can recognize shared library AC_DEFUN([AC_PATH_TOOL_PREFIX], [AC_REQUIRE([AC_PROG_EGREP])dnl AC_MSG_CHECKING([for $1]) @@ -2108,7 +2257,7 @@ # AC_PATH_MAGIC # ------------- -# find a file program which can recognise a shared library +# find a file program which can recognize a shared library AC_DEFUN([AC_PATH_MAGIC], [AC_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) if test -z "$lt_cv_path_MAGIC_CMD"; then @@ -2255,7 +2404,7 @@ # how to check for library dependencies # -- PORTME fill in with the dynamic library characteristics AC_DEFUN([AC_DEPLIBS_CHECK_METHOD], -[AC_CACHE_CHECK([how to recognise dependent libraries], +[AC_CACHE_CHECK([how to recognize dependent libraries], lt_cv_deplibs_check_method, [lt_cv_file_magic_cmd='$MAGIC_CMD' lt_cv_file_magic_test_file= @@ -2272,7 +2421,7 @@ # whether `pass_all' will *always* work, you probably want this one. case $host_os in -aix4* | aix5*) +aix[[4-9]]*) lt_cv_deplibs_check_method=pass_all ;; @@ -2294,9 +2443,15 @@ mingw* | pw32*) # Base MSYS/MinGW do not provide the 'file' command needed by - # func_win32_libid shell function, so use a weaker test based on 'objdump'. - lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?' - lt_cv_file_magic_cmd='$OBJDUMP -f' + # func_win32_libid shell function, so use a weaker test based on 'objdump', + # unless we find 'file', for example because we are cross-compiling. + if ( file / ) >/dev/null 2>&1; then + lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' + lt_cv_file_magic_cmd='func_win32_libid' + else + lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?' + lt_cv_file_magic_cmd='$OBJDUMP -f' + fi ;; darwin* | rhapsody*) @@ -2341,7 +2496,7 @@ esac ;; -interix3*) +interix[[3-9]]*) # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' ;; @@ -2391,6 +2546,10 @@ lt_cv_deplibs_check_method=pass_all ;; +rdos*) + lt_cv_deplibs_check_method=pass_all + ;; + solaris*) lt_cv_deplibs_check_method=pass_all ;; @@ -2443,7 +2602,7 @@ lt_cv_path_NM="$NM" else lt_nm_to_check="${ac_tool_prefix}nm" - if test -n "$ac_tool_prefix" && test "$build" = "$host"; then + if test -n "$ac_tool_prefix" && test "$build" = "$host"; then lt_nm_to_check="$lt_nm_to_check nm" fi for lt_tmp_nm in $lt_nm_to_check; do @@ -2659,10 +2818,10 @@ _LT_AC_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests -lt_simple_compile_test_code="int some_variable = 0;\n" +lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests -lt_simple_link_test_code='int main(){return(0);}\n' +lt_simple_link_test_code='int main(){return(0);}' _LT_AC_SYS_COMPILER @@ -2698,7 +2857,7 @@ fi ;; -aix4* | aix5*) +aix[[4-9]]*) if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then test "$enable_shared" = yes && enable_static=no fi @@ -2755,6 +2914,7 @@ _LT_AC_TAGVAR(predeps, $1)= _LT_AC_TAGVAR(postdeps, $1)= _LT_AC_TAGVAR(compiler_lib_search_path, $1)= +_LT_AC_TAGVAR(compiler_lib_search_dirs, $1)= # Source file extension for C++ test sources. ac_ext=cpp @@ -2764,10 +2924,10 @@ _LT_AC_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests -lt_simple_compile_test_code="int some_variable = 0;\n" +lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests -lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }\n' +lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_AC_SYS_COMPILER @@ -2864,7 +3024,7 @@ # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; - aix4* | aix5*) + aix[[4-9]]*) if test "$host_cpu" = ia64; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. @@ -2877,7 +3037,7 @@ # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # need to do runtime linking. - case $host_os in aix4.[[23]]|aix4.[[23]].*|aix5*) + case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) for ld_flag in $LDFLAGS; do case $ld_flag in *-brtl*) @@ -2913,7 +3073,7 @@ strings "$collect2name" | grep resolve_lib_name >/dev/null then # We have reworked collect2 - _LT_AC_TAGVAR(hardcode_direct, $1)=yes + : else # We have old collect2 _LT_AC_TAGVAR(hardcode_direct, $1)=unsupported @@ -3023,59 +3183,31 @@ fi ;; darwin* | rhapsody*) - case $host_os in - rhapsody* | darwin1.[[012]]) - _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}suppress' - ;; - *) # Darwin 1.3 on - if test -z ${MACOSX_DEPLOYMENT_TARGET} ; then - _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' - else - case ${MACOSX_DEPLOYMENT_TARGET} in - 10.[[012]]) - _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' - ;; - 10.*) - _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}dynamic_lookup' - ;; - esac - fi - ;; - esac _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no _LT_AC_TAGVAR(hardcode_direct, $1)=no _LT_AC_TAGVAR(hardcode_automatic, $1)=yes _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=unsupported _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='' _LT_AC_TAGVAR(link_all_deplibs, $1)=yes - - if test "$GXX" = yes ; then - lt_int_apple_cc_single_mod=no + _LT_AC_TAGVAR(allow_undefined_flag, $1)="$_lt_dar_allow_undefined" + if test "$GXX" = yes ; then output_verbose_link_cmd='echo' - if $CC -dumpspecs 2>&1 | $EGREP 'single_module' >/dev/null ; then - lt_int_apple_cc_single_mod=yes + _LT_AC_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" + _LT_AC_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" + _LT_AC_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" + _LT_AC_TAGVAR(module_expsym_cmds, $1)="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" + if test "$lt_cv_apple_cc_single_mod" != "yes"; then + _LT_AC_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}" + _LT_AC_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}" fi - if test "X$lt_int_apple_cc_single_mod" = Xyes ; then - _LT_AC_TAGVAR(archive_cmds, $1)='$CC -dynamiclib -single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring' else - _LT_AC_TAGVAR(archive_cmds, $1)='$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs~$CC -dynamiclib $allow_undefined_flag -o $lib ${lib}-master.o $deplibs $compiler_flags -install_name $rpath/$soname $verstring' - fi - _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags' - # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds - if test "X$lt_int_apple_cc_single_mod" = Xyes ; then - _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -dynamiclib -single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' - else - _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs~$CC -dynamiclib $allow_undefined_flag -o $lib ${lib}-master.o $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' - fi - _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' - else case $cc_basename in xlc*) output_verbose_link_cmd='echo' - _LT_AC_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $verstring' + _LT_AC_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $xlcverstring' _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags' # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds - _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' + _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $xlcverstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' ;; *) @@ -3158,9 +3290,7 @@ _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: case $host_cpu in - hppa*64*|ia64*) - _LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='+b $libdir' - ;; + hppa*64*|ia64*) ;; *) _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' ;; @@ -3228,7 +3358,7 @@ ;; esac ;; - interix3*) + interix[[3-9]]*) _LT_AC_TAGVAR(hardcode_direct, $1)=no _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' @@ -3320,7 +3450,7 @@ _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' ;; - pgCC*) + pgCC* | pgcpp*) # Portland Group C++ compiler _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' @@ -3348,6 +3478,29 @@ # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list' ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_AC_TAGVAR(no_undefined_flag, $1)=' -zdefs' + _LT_AC_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' + _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols' + _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' + _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' + + # Not sure whether something based on + # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 + # would be better. + output_verbose_link_cmd='echo' + + # Archives containing C++ object files must be created using + # "CC -xar", where "CC" is the Sun C++ compiler. This is + # necessary to make sure instantiated templates are included + # in the archive. + _LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' + ;; + esac + ;; esac ;; lynxos*) @@ -3386,16 +3539,20 @@ _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; openbsd*) - _LT_AC_TAGVAR(hardcode_direct, $1)=yes - _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no - _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' - _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' - if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then - _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib' - _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' - _LT_AC_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + if test -f /usr/libexec/ld.so; then + _LT_AC_TAGVAR(hardcode_direct, $1)=yes + _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no + _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' + _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' + if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then + _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib' + _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' + _LT_AC_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' + fi + output_verbose_link_cmd='echo' + else + _LT_AC_TAGVAR(ld_shlibs, $1)=no fi - output_verbose_link_cmd='echo' ;; osf3*) case $cc_basename in @@ -3557,15 +3714,10 @@ case $host_os in solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) - # The C++ compiler is used as linker so we must use $wl - # flag to pass the commands to the underlying system - # linker. We must also pass each convience library through - # to the system linker between allextract/defaultextract. - # The C++ compiler will combine linker options so we - # cannot just pass the convience library names through - # without $wl. + # The compiler driver will combine and reorder linker options, + # but understands `-z linker_flag'. # Supported since Solaris 2.6 (maybe 2.5.1?) - _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}-z ${wl}defaultextract' + _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' ;; esac _LT_AC_TAGVAR(link_all_deplibs, $1)=yes @@ -3612,6 +3764,12 @@ fi _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir' + case $host_os in + solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; + *) + _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' + ;; + esac fi ;; esac @@ -3727,7 +3885,8 @@ # compiler output when linking a shared library. # Parse the compiler output and extract the necessary # objects, libraries and library flags. -AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP],[ +AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP], +[AC_REQUIRE([LT_AC_PROG_SED])dnl dnl we can't use the lt_simple_compile_test_code here, dnl because it contains code intended for an executable, dnl not a library. It's possible we should let each @@ -3852,10 +4011,15 @@ $rm -f confest.$objext +_LT_AC_TAGVAR(compiler_lib_search_dirs, $1)= +if test -n "$_LT_AC_TAGVAR(compiler_lib_search_path, $1)"; then + _LT_AC_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_AC_TAGVAR(compiler_lib_search_path, $1)}" | ${SED} -e 's! -L! !g' -e 's!^ !!'` +fi + # PORTME: override above test on systems where it is broken ifelse([$1],[CXX], [case $host_os in -interix3*) +interix[[3-9]]*) # Interix 3.5 installs completely hosed .la files for C++, so rather than # hack all around it, let's just trust "g++" to DTRT. _LT_AC_TAGVAR(predep_objects,$1)= @@ -3863,19 +4027,51 @@ _LT_AC_TAGVAR(postdeps,$1)= ;; +linux*) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + # + # The more standards-conforming stlport4 library is + # incompatible with the Cstd library. Avoid specifying + # it if it's in CXXFLAGS. Ignore libCrun as + # -library=stlport4 depends on it. + case " $CXX $CXXFLAGS " in + *" -library=stlport4 "*) + solaris_use_stlport4=yes + ;; + esac + if test "$solaris_use_stlport4" != yes; then + _LT_AC_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' + fi + ;; + esac + ;; + solaris*) case $cc_basename in CC*) + # The more standards-conforming stlport4 library is + # incompatible with the Cstd library. Avoid specifying + # it if it's in CXXFLAGS. Ignore libCrun as + # -library=stlport4 depends on it. + case " $CXX $CXXFLAGS " in + *" -library=stlport4 "*) + solaris_use_stlport4=yes + ;; + esac + # Adding this requires a known-good setup of shared libraries for # Sun compiler versions before 5.6, else PIC objects from an old # archive will be linked into the output, leading to subtle bugs. - _LT_AC_TAGVAR(postdeps,$1)='-lCstd -lCrun' + if test "$solaris_use_stlport4" != yes; then + _LT_AC_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' + fi ;; esac ;; esac ]) - case " $_LT_AC_TAGVAR(postdeps, $1) " in *" -lc "*) _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no ;; esac @@ -3918,10 +4114,17 @@ _LT_AC_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests -lt_simple_compile_test_code=" subroutine t\n return\n end\n" +lt_simple_compile_test_code="\ + subroutine t + return + end +" # Code to be used in simple link tests -lt_simple_link_test_code=" program t\n end\n" +lt_simple_link_test_code="\ + program t + end +" # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_AC_SYS_COMPILER @@ -3953,7 +4156,7 @@ postinstall_cmds='$RANLIB $lib' fi ;; -aix4* | aix5*) +aix[[4-9]]*) if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then test "$enable_shared" = yes && enable_static=no fi @@ -4000,10 +4203,10 @@ _LT_AC_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests -lt_simple_compile_test_code="class foo {}\n" +lt_simple_compile_test_code="class foo {}" # Code to be used in simple link tests -lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }\n' +lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_AC_SYS_COMPILER @@ -4056,7 +4259,7 @@ _LT_AC_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests -lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }\n' +lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' # Code to be used in simple link tests lt_simple_link_test_code="$lt_simple_compile_test_code" @@ -4130,6 +4333,7 @@ _LT_AC_TAGVAR(predeps, $1) \ _LT_AC_TAGVAR(postdeps, $1) \ _LT_AC_TAGVAR(compiler_lib_search_path, $1) \ + _LT_AC_TAGVAR(compiler_lib_search_dirs, $1) \ _LT_AC_TAGVAR(archive_cmds, $1) \ _LT_AC_TAGVAR(archive_expsym_cmds, $1) \ _LT_AC_TAGVAR(postinstall_cmds, $1) \ @@ -4145,6 +4349,7 @@ _LT_AC_TAGVAR(module_cmds, $1) \ _LT_AC_TAGVAR(module_expsym_cmds, $1) \ _LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1) \ + _LT_AC_TAGVAR(fix_srcfile_path, $1) \ _LT_AC_TAGVAR(exclude_expsyms, $1) \ _LT_AC_TAGVAR(include_expsyms, $1); do @@ -4191,7 +4396,7 @@ # Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP) # NOTE: Changes made to this file will be lost: look at ltmain.sh. # -# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 +# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 # Free Software Foundation, Inc. # # This file is part of GNU Libtool: @@ -4428,6 +4633,10 @@ # shared library. postdeps=$lt_[]_LT_AC_TAGVAR(postdeps, $1) +# The directories searched by this compiler when creating a shared +# library +compiler_lib_search_dirs=$lt_[]_LT_AC_TAGVAR(compiler_lib_search_dirs, $1) + # The library search path used internally by the compiler when linking # a shared library. compiler_lib_search_path=$lt_[]_LT_AC_TAGVAR(compiler_lib_search_path, $1) @@ -4516,7 +4725,7 @@ sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec # Fix the shell variable \$srcfile for the compiler. -fix_srcfile_path="$_LT_AC_TAGVAR(fix_srcfile_path, $1)" +fix_srcfile_path=$lt_fix_srcfile_path # Set to yes if exported symbols are required. always_export_symbols=$_LT_AC_TAGVAR(always_export_symbols, $1) @@ -4599,6 +4808,7 @@ # --------------------------------- AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_REQUIRE([AC_CANONICAL_HOST]) +AC_REQUIRE([LT_AC_PROG_SED]) AC_REQUIRE([AC_PROG_NM]) AC_REQUIRE([AC_OBJEXT]) # Check for command to grab the raw symbol name followed by C symbol from nm. @@ -4776,7 +4986,7 @@ echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD cat conftest.$ac_ext >&5 fi - rm -f conftest* conftst* + rm -rf conftest* conftst* # Do not use the global_symbol_pipe unless it works. if test "$pipe_works" = yes; then @@ -4825,13 +5035,16 @@ # like `-m68040'. _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' ;; - beos* | cygwin* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; - mingw* | os2* | pw32*) + mingw* | cygwin* | os2* | pw32*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). - _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT' + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) ;; darwin* | rhapsody*) # PIC is the default on this platform @@ -4842,7 +5055,7 @@ # DJGPP does not support shared libraries at all _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)= ;; - interix3*) + interix[[3-9]]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; @@ -4868,7 +5081,7 @@ esac else case $host_os in - aix4* | aix5*) + aix[[4-9]]*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor @@ -4964,7 +5177,7 @@ _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; - pgCC*) + pgCC* | pgcpp*) # Portland Group C++ compiler. _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' @@ -4978,6 +5191,14 @@ _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C++ 5.9 + _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' + ;; + esac ;; esac ;; @@ -5098,14 +5319,17 @@ _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' ;; - beos* | cygwin* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) + beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; - mingw* | pw32* | os2*) + mingw* | cygwin* | pw32* | os2*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). - _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT' + # Although the cygwin gcc ignores -fPIC, still need this for old-style + # (--disable-auto-import) libraries + m4_if([$1], [GCJ], [], + [_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) ;; darwin* | rhapsody*) @@ -5114,7 +5338,7 @@ _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' ;; - interix3*) + interix[[3-9]]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; @@ -5172,10 +5396,11 @@ esac ;; - mingw* | pw32* | os2*) + mingw* | cygwin* | pw32* | os2*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). - _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT' + m4_if([$1], [GCJ], [], + [_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) ;; hpux9* | hpux10* | hpux11*) @@ -5224,6 +5449,22 @@ # All Alpha code is PIC. _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; + *) + case `$CC -V 2>&1 | sed 5q` in + *Sun\ C*) + # Sun C 5.9 + _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' + ;; + *Sun\ F*) + # Sun Fortran 8.3 passes all unrecognized flags to the linker + _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' + _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' + _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='' + ;; + esac + ;; esac ;; @@ -5233,6 +5474,10 @@ _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; + rdos*) + _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' + ;; + solaris*) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' @@ -5292,7 +5537,7 @@ # if test -n "$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)"; then AC_LIBTOOL_COMPILER_OPTION([if $compiler PIC flag $_LT_AC_TAGVAR(lt_prog_compiler_pic, $1) works], - _LT_AC_TAGVAR(lt_prog_compiler_pic_works, $1), + _LT_AC_TAGVAR(lt_cv_prog_compiler_pic_works, $1), [$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)ifelse([$1],[],[ -DPIC],[ifelse([$1],[CXX],[ -DPIC],[])])], [], [case $_LT_AC_TAGVAR(lt_prog_compiler_pic, $1) in "" | " "*) ;; @@ -5316,7 +5561,7 @@ # wl=$_LT_AC_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_AC_TAGVAR(lt_prog_compiler_static, $1)\" AC_LIBTOOL_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], - _LT_AC_TAGVAR(lt_prog_compiler_static_works, $1), + _LT_AC_TAGVAR(lt_cv_prog_compiler_static_works, $1), $lt_tmp_static_flag, [], [_LT_AC_TAGVAR(lt_prog_compiler_static, $1)=]) @@ -5327,11 +5572,12 @@ # ------------------------------------ # See if the linker supports building shared libraries. AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS], -[AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) +[AC_REQUIRE([LT_AC_PROG_SED])dnl +AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) ifelse([$1],[CXX],[ _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' case $host_os in - aix4* | aix5*) + aix[[4-9]]*) # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to AIX nm, but means don't demangle with GNU nm if $NM -V 2>&1 | grep 'GNU' > /dev/null; then @@ -5344,7 +5590,7 @@ _LT_AC_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds" ;; cygwin* | mingw*) - _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]] /s/.* \([[^ ]]*\)/\1 DATA/;/^.* __nm__/s/^.* __nm__\([[^ ]]*\) [[^ ]]*/\1 DATA/;/^I /d;/^[[AITW]] /s/.* //'\'' | sort | uniq > $export_symbols' + _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;/^.*[[ ]]__nm__/s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' ;; linux* | k*bsd*-gnu) _LT_AC_TAGVAR(link_all_deplibs, $1)=no @@ -5353,6 +5599,7 @@ _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' ;; esac + _LT_AC_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] ],[ runpath_var= _LT_AC_TAGVAR(allow_undefined_flag, $1)= @@ -5383,12 +5630,14 @@ # it will be wrapped by ` (' and `)$', so one must not match beginning or # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', # as well as any symbol that contains `d'. - _LT_AC_TAGVAR(exclude_expsyms, $1)="_GLOBAL_OFFSET_TABLE_" + _LT_AC_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out # platforms (ab)use it in PIC code, but their linkers get confused if # the symbol is explicitly referenced. Since portable code cannot # rely on this symbol name, it's probably fine to never include it in # preloaded symbol tables. + # Exclude shared library initialization/finalization symbols. +dnl Note also adjust exclude_expsyms for C++ above. extract_expsyms_cmds= # Just being paranoid about ensuring that cc_basename is set. _LT_CC_BASENAME([$compiler]) @@ -5438,7 +5687,7 @@ # See if GNU ld supports shared libraries. case $host_os in - aix3* | aix4* | aix5*) + aix[[3-9]]*) # On AIX/PPC, the GNU linker is very broken if test "$host_cpu" != ia64; then _LT_AC_TAGVAR(ld_shlibs, $1)=no @@ -5486,7 +5735,7 @@ _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_AC_TAGVAR(always_export_symbols, $1)=no _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=yes - _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]] /s/.* \([[^ ]]*\)/\1 DATA/'\'' | $SED -e '\''/^[[AITW]] /s/.* //'\'' | sort | uniq > $export_symbols' + _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/'\'' -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' @@ -5504,7 +5753,7 @@ fi ;; - interix3*) + interix[[3-9]]*) _LT_AC_TAGVAR(hardcode_direct, $1)=no _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' @@ -5519,7 +5768,7 @@ _LT... [truncated message content] |
From: <jp...@us...> - 2008-03-27 15:18:24
|
Revision: 51 http://faxpp.svn.sourceforge.net/faxpp/?rev=51&view=rev Author: jpcs Date: 2008-03-27 08:18:13 -0700 (Thu, 27 Mar 2008) Log Message: ----------- Fix for building on Windows. Modified Paths: -------------- trunk/faxpp/include/faxpp/transcode.h Modified: trunk/faxpp/include/faxpp/transcode.h =================================================================== --- trunk/faxpp/include/faxpp/transcode.h 2008-03-20 16:03:47 UTC (rev 50) +++ trunk/faxpp/include/faxpp/transcode.h 2008-03-27 15:18:13 UTC (rev 51) @@ -17,7 +17,13 @@ #ifndef __FAXPP__TRANSCODE_H #define __FAXPP__TRANSCODE_H +#ifdef _MSC_VER +typedef unsigned __int8 uint8_t +typedef unsigned __int16 uint16_t +typedef unsigned __int32 uint32_t +#else #include <stdint.h> +#endif /// A single unicode codepoint typedef uint32_t Char32; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-03-20 16:03:43
|
Revision: 50 http://faxpp.svn.sourceforge.net/faxpp/?rev=50&view=rev Author: jpcs Date: 2008-03-20 09:03:47 -0700 (Thu, 20 Mar 2008) Log Message: ----------- Tag the 0.3 release. Added Paths: ----------- branch/0.3/ Copied: branch/0.3 (from rev 49, trunk/faxpp) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: John S. <joh...@or...> - 2008-03-20 15:59:41
|
Faxpp is a small, fast and conformant XML pull parser written in C with an API that can return UTF-8 or UTF-16 strings. This release is a beta release, to allow users to test the parser and provide feedback on problems. This release includes support for parsing DTDs, and resolving internal and external entities. Faxpp 0.3 can be downloaded from here: http://sourceforge.net/project/showfiles.php?group_id=201903 Full API documentation can be found here: http://faxpp.sourceforge.net/ The full change log can be found here: http://sourceforge.net/project/shownotes.php?release_id=585780&group_id=201903 -- John Snelson, Oracle Corporation http://snelson.org.uk/john Berkeley DB XML: http://www.oracle.com/database/berkeley-db/xml XQilla: http://xqilla.sourceforge.net |
From: <jp...@us...> - 2008-03-20 13:05:34
|
Revision: 49 http://faxpp.svn.sourceforge.net/faxpp/?rev=49&view=rev Author: jpcs Date: 2008-03-20 06:05:36 -0700 (Thu, 20 Mar 2008) Log Message: ----------- Added windows project files to todo list Modified Paths: -------------- trunk/faxpp/TODO Modified: trunk/faxpp/TODO =================================================================== --- trunk/faxpp/TODO 2008-03-20 02:33:24 UTC (rev 48) +++ trunk/faxpp/TODO 2008-03-20 13:05:36 UTC (rev 49) @@ -1,6 +1,7 @@ Small tasks ----------- +Windows project files Normalize line endings in element character content / PI values / comment values Accept XML 1.1 line endings as whitespace This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-03-20 02:33:21
|
Revision: 48 http://faxpp.svn.sourceforge.net/faxpp/?rev=48&view=rev Author: jpcs Date: 2008-03-19 19:33:24 -0700 (Wed, 19 Mar 2008) Log Message: ----------- Updated the documentation and change log. Modified Paths: -------------- trunk/faxpp/ChangeLog trunk/faxpp/docs/header.html trunk/faxpp/include/faxpp/parser.h Modified: trunk/faxpp/ChangeLog =================================================================== --- trunk/faxpp/ChangeLog 2008-03-20 01:56:21 UTC (rev 47) +++ trunk/faxpp/ChangeLog 2008-03-20 02:33:24 UTC (rev 48) @@ -1,6 +1,27 @@ Faxpp: A small, fast XML pull parser written in C with an API that can return UTF-8 or UTF-16 strings. +version 0.3 (2008.03.20): + * Implemented the parsing of doctype declarations and internal + subsets. + * Implemented the parsing of external subsets (DTDs) and + external parsed entities, and a mechanism for resolving them. + * Implemented entity resolution and replacement. + * Implemented a base URI for the parser, which is used to + resolve external entities. + * Changed FAXPP_set_decode() so that if the user sets a decode + function, all other indications of encoding are ignored. + * Added methods to return a decode or encode function, given a + string defining the encoding. + * Added line and column numbers to attribute values. + * Fixed a bug that was causing all strings to be copied. + * Fixed column counting, and line counting for "\r\n" at a + buffer boundary. + * Fixed the handling of namespace URIs for "xml" and "xmlns", + added raised the correct errors for redefining them. + * Fixed the parser to always return a START_DOCUMENT_EVENT. + * Fixed various conformance and memory related bugs. + version 0.2 (2007.08.20): * Implemented a test harness for the XML Conformance Test Suite * Added support for streaming input to the tokenizer and parser, Modified: trunk/faxpp/docs/header.html =================================================================== --- trunk/faxpp/docs/header.html 2008-03-20 01:56:21 UTC (rev 47) +++ trunk/faxpp/docs/header.html 2008-03-20 02:33:24 UTC (rev 48) @@ -1,6 +1,6 @@ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1"> -<title>Faxpp 0.2 Documentation</title> +<title>Faxpp 0.3 Documentation</title> <link href="doxygen.css" rel="stylesheet" type="text/css"> <link href="tabs.css" rel="stylesheet" type="text/css"> </head><body> Modified: trunk/faxpp/include/faxpp/parser.h =================================================================== --- trunk/faxpp/include/faxpp/parser.h 2008-03-20 01:56:21 UTC (rev 47) +++ trunk/faxpp/include/faxpp/parser.h 2008-03-20 02:33:24 UTC (rev 48) @@ -26,7 +26,7 @@ /** * \mainpage * - * Faxpp is a small, fast and conformant XML pull parser written in C with an API that can return UTF-8 or UTF-16 strings. + * Faxpp is a small, fast and conformant XML pull parser written in C with an API that can return strings in any encoding including UTF-8 and UTF-16. * * Faxpp is written by John Snelson, and is released under the terms of the Apache Licence v2. * @@ -39,7 +39,7 @@ * * \section Performance * - * Faxpp's main performance benefit comes from it's ability to not copy strings - instead + * Faxpp's main performance benefit comes from it's ability to reduce and eliminate string copies - instead, * the FAXPP_Text structure will point directly to the tokenizer's buffer when possible. * This is possible when: * @@ -57,16 +57,17 @@ * * \section Downloads * - * \li Faxpp can be downloaded from here: http://sourceforge.net/project/showfiles.php?group_id=201903 - * \li Anonymous access to the subversion repository is also available. Instructions can be found here: http://sourceforge.net/svn/?group_id=201903 - * \li You can browse the subversion repository here: http://faxpp.svn.sourceforge.net/viewvc/faxpp/ + * \li Faxpp can be <a href="http://sourceforge.net/project/showfiles.php?group_id=201903">downloaded</a> from Sourceforge. + * \li <a href="http://sourceforge.net/svn/?group_id=201903">Anonymous access</a> to the + * <a href="http://faxpp.svn.sourceforge.net/viewvc/faxpp/">subversion repository</a> is also available. * * \section Support * - * \li There is a user mailing list, faxpp-user, which is the appropriate place to direct any questions or problems. You can join the mailing list here: - * https://lists.sourceforge.net/lists/listinfo/faxpp-user - * \li Archives for the mailing list are available here: http://sourceforge.net/mailarchive/forum.php?forum_name=faxpp-user - * \li If you have a bug to report, you may wish to enter it into our bug database, here: http://sourceforge.net/tracker/?atid=979420&group_id=201903 + * \li There is a <a href="http://sourceforge.net/mailarchive/forum.php?forum_name=faxpp-user">user mailing list</a>, + * <a href="https://lists.sourceforge.net/lists/listinfo/faxpp-user">faxpp-user</a>, which is the appropriate place to + * direct any questions or problems. + * \li If you have a bug to report, you may wish to enter it into our + * <a href="http://sourceforge.net/tracker/?atid=979420&group_id=201903">bug database</a>. * */ @@ -463,33 +464,7 @@ * * \param parser * - * \retval ELEMENT_NAME_MISMATCH - * \retval NO_URI_FOR_PREFIX - * \retval DUPLICATE_ATTRIBUTES - * \retval DOUBLE_DASH_IN_COMMENT - * \retval PREMATURE_END_OF_BUFFER - * \retval INVALID_START_OF_COMMENT - * \retval INVALID_CHAR_IN_START_ELEMENT - * \retval INVALID_CHAR_IN_ATTRIBUTE - * \retval INVALID_CHAR_IN_END_ELEMENT - * \retval NON_WHITESPACE_OUTSIDE_DOC_ELEMENT - * \retval BAD_ENCODING - * \retval UNSUPPORTED_ENCODING - * \retval ADDITIONAL_DOCUMENT_ELEMENT - * \retval INVALID_CHAR_IN_PI_NAME - * \retval INVALID_PI_NAME_OF_XML - * \retval INVALID_CHAR_IN_ELEMENT_NAME - * \retval INVALID_CHAR_IN_ATTRIBUTE_NAME - * \retval RESTRICTED_CHAR - * \retval INVALID_CHAR_IN_ENTITY_REFERENCE - * \retval INVALID_CHAR_IN_CHAR_REFERENCE - * \retval INVALID_CHAR_IN_XML_DECL - * \retval EXPECTING_EQUALS - * \retval EXPECTING_WHITESPACE - * \retval UNKNOWN_XML_VERSION - * \retval INVALID_ENCODING_VALUE - * \retval OUT_OF_MEMORY - * \retval NO_ERROR + * \return Any error that occurs * * \relatesalso FAXPP_Parser */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-03-20 01:56:14
|
Revision: 47 http://faxpp.svn.sourceforge.net/faxpp/?rev=47&view=rev Author: jpcs Date: 2008-03-19 18:56:21 -0700 (Wed, 19 Mar 2008) Log Message: ----------- Added recognition of parameter entities in entity and notation declarations. Raise errors for redeclaration of "xml" and "xmlns" namespaces. Validate the value of "xml:space" attributes. Modified Paths: -------------- trunk/faxpp/Makefile.am trunk/faxpp/Makefile.in trunk/faxpp/TODO trunk/faxpp/include/faxpp/error.h trunk/faxpp/src/doctype.c trunk/faxpp/src/entitydecl.c trunk/faxpp/src/error.c trunk/faxpp/src/notationdecl.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/tests/xmlconf_runner.c Added Paths: ----------- trunk/faxpp/src/system_public_states.h Modified: trunk/faxpp/Makefile.am =================================================================== --- trunk/faxpp/Makefile.am 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/Makefile.am 2008-03-20 01:56:21 UTC (rev 47) @@ -37,7 +37,8 @@ src/attlistdecl.c \ src/notationdecl.c \ src/entitydecl.c \ -src/conditional.c +src/conditional.c \ +src/system_public_states.h tokenizer_example_LDADD = libfaxpp.la tokenizer_example_SOURCES = examples/tokenizer_example.c @@ -46,13 +47,17 @@ parser_example_SOURCES = \ examples/parser_example.c \ examples/entity_resolver.c \ -examples/output_event.c +examples/entity_resolver.h \ +examples/output_event.c \ +examples/output_event.h xmlconf_runner_LDADD = libfaxpp.la xmlconf_runner_SOURCES = \ tests/xmlconf_runner.c \ examples/entity_resolver.c \ -examples/output_event.c +examples/entity_resolver.h \ +examples/output_event.c \ +examples/output_event.h EXTRA_DIST = \ docs/Doxyfile.api \ Modified: trunk/faxpp/Makefile.in =================================================================== --- trunk/faxpp/Makefile.in 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/Makefile.in 2008-03-20 01:56:21 UTC (rev 47) @@ -260,7 +260,8 @@ src/attlistdecl.c \ src/notationdecl.c \ src/entitydecl.c \ -src/conditional.c +src/conditional.c \ +src/system_public_states.h tokenizer_example_LDADD = libfaxpp.la tokenizer_example_SOURCES = examples/tokenizer_example.c @@ -268,13 +269,17 @@ parser_example_SOURCES = \ examples/parser_example.c \ examples/entity_resolver.c \ -examples/output_event.c +examples/entity_resolver.h \ +examples/output_event.c \ +examples/output_event.h xmlconf_runner_LDADD = libfaxpp.la xmlconf_runner_SOURCES = \ tests/xmlconf_runner.c \ examples/entity_resolver.c \ -examples/output_event.c +examples/entity_resolver.h \ +examples/output_event.c \ +examples/output_event.h EXTRA_DIST = \ docs/Doxyfile.api \ Modified: trunk/faxpp/TODO =================================================================== --- trunk/faxpp/TODO 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/TODO 2008-03-20 01:56:21 UTC (rev 47) @@ -1,17 +1,11 @@ Small tasks ----------- -Don't accept Namespace 1.1 undefines in XML 1.0 mode Normalize line endings in element character content / PI values / comment values Accept XML 1.1 line endings as whitespace -Handle "xml" namespace properly -xml:space value checking -Error for redefining "xml" namespace -Error for defining "xmlns" namespace -Parse element decls correctly -Parse parameter entities in markup correctly Large tasks ----------- +Attribute default values DTD validation Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/include/faxpp/error.h 2008-03-20 01:56:21 UTC (rev 47) @@ -75,7 +75,9 @@ ELEMENT_NAME_MISMATCH, NO_URI_FOR_PREFIX, - DUPLICATE_ATTRIBUTES + DUPLICATE_ATTRIBUTES, + INVALID_NAMESPACE_DECLARATION, + INVALID_XMLSPACE_VALUE } FAXPP_Error; /** Modified: trunk/faxpp/src/doctype.c =================================================================== --- trunk/faxpp/src/doctype.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/doctype.c 2008-03-20 01:56:21 UTC (rev 47) @@ -198,12 +198,10 @@ WHITESPACE: break; case 'S': - env->stored_state = doctype_internal_subset_start_state; - env->state = system_id_initial_state1; + env->state = doctype_system_id_initial_state1; break; case 'P': - env->stored_state = doctype_internal_subset_start_state; - env->state = public_id_initial_state1; + env->state = doctype_public_id_initial_state1; break; case '[': env->state = internal_subset_state; @@ -223,292 +221,15 @@ return NO_ERROR; } -SINGLE_CHAR_STATE(system_id_initial_state1, 'Y', 0, system_id_initial_state2, INVALID_SYSTEM_ID) -SINGLE_CHAR_STATE(system_id_initial_state2, 'S', 0, system_id_initial_state3, INVALID_SYSTEM_ID) -SINGLE_CHAR_STATE(system_id_initial_state3, 'T', 0, system_id_initial_state4, INVALID_SYSTEM_ID) -SINGLE_CHAR_STATE(system_id_initial_state4, 'E', 0, system_id_initial_state5, INVALID_SYSTEM_ID) -SINGLE_CHAR_STATE(system_id_initial_state5, 'M', 0, system_id_ws_state, INVALID_SYSTEM_ID) +#define PREFIX(name) doctype_ ## name +#define END_STATE doctype_internal_subset_start_state -FAXPP_Error -system_id_ws_state(FAXPP_TokenizerEnv *env) -{ - read_char(env); +#include "system_public_states.h" - switch(env->current_char) { - WHITESPACE: - env->state = system_literal_start_state; - next_char(env); - break; - default: - env->state = system_literal_start_state; - return EXPECTING_WHITESPACE; - } - return NO_ERROR; -} +#undef END_STATE +#undef PREFIX FAXPP_Error -system_literal_start_state(FAXPP_TokenizerEnv *env) -{ - read_char(env); - - switch(env->current_char) { - WHITESPACE: - next_char(env); - return NO_ERROR; - case '"': - env->state = system_literal_quot_state; - break; - case '\'': - env->state = system_literal_apos_state; - break; - default: - next_char(env); - return EXPECTING_SYSTEM_LITERAL; - } - next_char(env); - token_start_position(env); - return NO_ERROR; -} - -FAXPP_Error -system_literal_apos_state(FAXPP_TokenizerEnv *env) -{ - while(1) { - read_char(env); - - switch(env->current_char) { - case '\'': - retrieve_state(env); - token_end_position(env); - report_token(SYSTEM_LITERAL_TOKEN, env); - next_char(env); - return NO_ERROR; - LINE_ENDINGS - default: - if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { - next_char(env); - return RESTRICTED_CHAR; - } - break; - } - next_char(env); - } - - // Never happens - return NO_ERROR; -} - -FAXPP_Error -system_literal_quot_state(FAXPP_TokenizerEnv *env) -{ - while(1) { - read_char(env); - - switch(env->current_char) { - case '"': - retrieve_state(env); - token_end_position(env); - report_token(SYSTEM_LITERAL_TOKEN, env); - next_char(env); - return NO_ERROR; - LINE_ENDINGS - default: - if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { - next_char(env); - return RESTRICTED_CHAR; - } - break; - } - next_char(env); - } - - // Never happens - return NO_ERROR; -} - -SINGLE_CHAR_STATE(public_id_initial_state1, 'U', 0, public_id_initial_state2, INVALID_PUBLIC_ID) -SINGLE_CHAR_STATE(public_id_initial_state2, 'B', 0, public_id_initial_state3, INVALID_PUBLIC_ID) -SINGLE_CHAR_STATE(public_id_initial_state3, 'L', 0, public_id_initial_state4, INVALID_PUBLIC_ID) -SINGLE_CHAR_STATE(public_id_initial_state4, 'I', 0, public_id_initial_state5, INVALID_PUBLIC_ID) -SINGLE_CHAR_STATE(public_id_initial_state5, 'C', 0, public_id_ws_state, INVALID_PUBLIC_ID) - -FAXPP_Error -public_id_ws_state(FAXPP_TokenizerEnv *env) -{ - read_char(env); - - switch(env->current_char) { - WHITESPACE: - env->state = pubid_literal_start_state; - next_char(env); - break; - default: - env->state = pubid_literal_start_state; - return EXPECTING_WHITESPACE; - } - return NO_ERROR; -} - -FAXPP_Error -pubid_literal_start_state(FAXPP_TokenizerEnv *env) -{ - read_char(env); - - switch(env->current_char) { - WHITESPACE: - next_char(env); - return NO_ERROR; - case '"': - env->state = pubid_literal_quot_state; - break; - case '\'': - env->state = pubid_literal_apos_state; - break; - default: - next_char(env); - return EXPECTING_PUBID_LITERAL; - } - next_char(env); - token_start_position(env); - return NO_ERROR; -} - -FAXPP_Error -pubid_literal_apos_state(FAXPP_TokenizerEnv *env) -{ - while(1) { - read_char(env); - - switch(env->current_char) { - case '\'': - env->state = public_id_ws_state2; - token_end_position(env); - report_token(PUBID_LITERAL_TOKEN, env); - next_char(env); - return NO_ERROR; - // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] - LINE_ENDINGS - // A-Z - case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: - case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: - case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: - case 0x58: case 0x59: case 0x5A: - // a-z - case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: - case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: - case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: - case 0x78: case 0x79: case 0x7A: - // 0-9 - case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': - case '9': - case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':': - case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_': - case '%': - // Valid PubidChar - break; - default: - next_char(env); - return INVALID_CHAR_IN_PUBID_LITERAL; - } - next_char(env); - } - - // Never happens - return NO_ERROR; -} - -FAXPP_Error -pubid_literal_quot_state(FAXPP_TokenizerEnv *env) -{ - while(1) { - read_char(env); - - switch(env->current_char) { - case '"': - env->state = public_id_ws_state2; - token_end_position(env); - report_token(PUBID_LITERAL_TOKEN, env); - next_char(env); - return NO_ERROR; - // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] - LINE_ENDINGS - // A-Z - case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: - case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: - case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: - case 0x58: case 0x59: case 0x5A: - // a-z - case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: - case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: - case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: - case 0x78: case 0x79: case 0x7A: - // 0-9 - case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': - case '9': - case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':': - case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_': - case '%': case '\'': - // Valid PubidChar - break; - default: - next_char(env); - return INVALID_CHAR_IN_PUBID_LITERAL; - } - next_char(env); - } - - // Never happens - return NO_ERROR; -} - -FAXPP_Error -public_id_ws_state2(FAXPP_TokenizerEnv *env) -{ - read_char(env); - - switch(env->current_char) { - WHITESPACE: - env->state = public_id_ws_state3; - next_char(env); - break; - case '>': - if(env->stored_state == notationdecl_end_state) { - // Notation decls can skip the system literal - retrieve_state(env); - return NO_ERROR; - } - // Fall through - default: - env->state = system_literal_start_state; - return EXPECTING_WHITESPACE; - } - return NO_ERROR; -} - -FAXPP_Error -public_id_ws_state3(FAXPP_TokenizerEnv *env) -{ - read_char(env); - - switch(env->current_char) { - WHITESPACE: - next_char(env); - break; - case '>': - if(env->stored_state == notationdecl_end_state) { - // Notation decls can skip the system literal - retrieve_state(env); - return NO_ERROR; - } - // Fall through - default: - env->state = system_literal_start_state; - break; - } - return NO_ERROR; -} - -FAXPP_Error doctype_internal_subset_start_state(FAXPP_TokenizerEnv *env) { read_char(env); Modified: trunk/faxpp/src/entitydecl.c =================================================================== --- trunk/faxpp/src/entitydecl.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/entitydecl.c 2008-03-20 01:56:21 UTC (rev 47) @@ -111,12 +111,10 @@ token_start_position(env); return NO_ERROR; case 'S': - env->stored_state = entitydecl_ws_state; - env->state = system_id_initial_state1; + env->state = entitydecl_system_id_initial_state1; break; case 'P': - env->stored_state = entitydecl_ws_state; - env->state = public_id_initial_state1; + env->state = entitydecl_public_id_initial_state1; break; LINE_ENDINGS default: @@ -127,6 +125,16 @@ return NO_ERROR; } +#define PREFIX(name) entitydecl_ ## name +#define END_STATE entitydecl_ws_state +#define ALLOW_PARAMETER_ENTITIES + +#include "system_public_states.h" + +#undef ALLOW_PARAMETER_ENTITIES +#undef END_STATE +#undef PREFIX + FAXPP_Error entitydecl_value_apos_state(FAXPP_TokenizerEnv *env) { @@ -422,12 +430,10 @@ token_start_position(env); return NO_ERROR; case 'S': - env->stored_state = paramentitydecl_end_state; - env->state = system_id_initial_state1; + env->state = paramentitydecl_system_id_initial_state1; break; case 'P': - env->stored_state = paramentitydecl_end_state; - env->state = public_id_initial_state1; + env->state = paramentitydecl_public_id_initial_state1; break; LINE_ENDINGS default: @@ -438,6 +444,16 @@ return NO_ERROR; } +#define PREFIX(name) paramentitydecl_ ## name +#define END_STATE paramentitydecl_end_state +#define ALLOW_PARAMETER_ENTITIES + +#include "system_public_states.h" + +#undef ALLOW_PARAMETER_ENTITIES +#undef END_STATE +#undef PREFIX + FAXPP_Error paramentitydecl_value_apos_state(FAXPP_TokenizerEnv *env) { Modified: trunk/faxpp/src/error.c =================================================================== --- trunk/faxpp/src/error.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/error.c 2008-03-20 01:56:21 UTC (rev 47) @@ -137,6 +137,10 @@ return "IMPROPER_NESTING_OF_ENTITY"; case PARAMETER_ENTITY_IN_INTERNAL_SUBSET: return "PARAMETER_ENTITY_IN_INTERNAL_SUBSET"; + case INVALID_NAMESPACE_DECLARATION: + return "INVALID_NAMESPACE_DECLARATION"; + case INVALID_XMLSPACE_VALUE: + return "INVALID_XMLSPACE_VALUE"; case NO_ERROR: break; } Modified: trunk/faxpp/src/notationdecl.c =================================================================== --- trunk/faxpp/src/notationdecl.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/notationdecl.c 2008-03-20 01:56:21 UTC (rev 47) @@ -97,12 +97,10 @@ switch(env->current_char) { case 'S': - env->stored_state = notationdecl_end_state; - env->state = system_id_initial_state1; + env->state = notationdecl_system_id_initial_state1; break; case 'P': - env->stored_state = notationdecl_end_state; - env->state = public_id_initial_state1; + env->state = notationdecl_public_id_initial_state1; break; LINE_ENDINGS default: @@ -113,6 +111,18 @@ return NO_ERROR; } +#define PREFIX(name) notationdecl_ ## name +#define END_STATE notationdecl_end_state +#define SKIP_SYSTEM_LITERAL +#define ALLOW_PARAMETER_ENTITIES + +#include "system_public_states.h" + +#undef ALLOW_PARAMETER_ENTITIES +#undef SKIP_SYSTEM_LITERAL +#undef END_STATE +#undef PREFIX + FAXPP_Error notationdecl_end_state(FAXPP_TokenizerEnv *env) { Added: trunk/faxpp/src/system_public_states.h =================================================================== --- trunk/faxpp/src/system_public_states.h (rev 0) +++ trunk/faxpp/src/system_public_states.h 2008-03-20 01:56:21 UTC (rev 47) @@ -0,0 +1,392 @@ +/* + * Copyright 2007 Doxological Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This file needs to have a number of macros defined before it is included +// PREFIX(name) +// END_STATE +// SKIP_SYSTEM_LITERAL +// ALLOW_PARAMETER_ENTITIES + +#define SP_SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \ +FAXPP_Error \ +name(FAXPP_TokenizerEnv *env) \ +{ \ + read_char(env); \ +\ + switch(env->current_char) { \ + case (ch): \ + if((next_stored_state) != 0) env->stored_state = (next_stored_state); \ + env->state = (next_state); \ + next_char(env); \ + break; \ + LINE_ENDINGS \ + default: \ + next_char(env); \ + return (error); \ + } \ + return NO_ERROR; \ +} + +SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state1), 'Y', 0, PREFIX(system_id_initial_state2), INVALID_SYSTEM_ID) +SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state2), 'S', 0, PREFIX(system_id_initial_state3), INVALID_SYSTEM_ID) +SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state3), 'T', 0, PREFIX(system_id_initial_state4), INVALID_SYSTEM_ID) +SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state4), 'E', 0, PREFIX(system_id_initial_state5), INVALID_SYSTEM_ID) +SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state5), 'M', 0, PREFIX(system_id_ws_state), INVALID_SYSTEM_ID) + +FAXPP_Error +PREFIX(system_id_ws_state)(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = PREFIX(system_literal_start_state); + next_char(env); + break; +#ifdef ALLOW_PARAMETER_ENTITIES + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; +#endif + default: + env->state = PREFIX(system_literal_start_state); + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +PREFIX(system_literal_start_state)(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + return NO_ERROR; +#ifdef ALLOW_PARAMETER_ENTITIES + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; +#endif + case '"': + env->state = PREFIX(system_literal_quot_state); + break; + case '\'': + env->state = PREFIX(system_literal_apos_state); + break; + default: + next_char(env); + return EXPECTING_SYSTEM_LITERAL; + } + next_char(env); + token_start_position(env); + return NO_ERROR; +} + +FAXPP_Error +PREFIX(system_literal_apos_state)(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + case '\'': + env->state = END_STATE; + token_end_position(env); + report_token(SYSTEM_LITERAL_TOKEN, env); + next_char(env); + return NO_ERROR; + case '#': + next_char(env); + return INVALID_SYSTEM_ID; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +PREFIX(system_literal_quot_state)(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + case '"': + env->state = END_STATE; + token_end_position(env); + report_token(SYSTEM_LITERAL_TOKEN, env); + next_char(env); + return NO_ERROR; + case '#': + next_char(env); + return INVALID_SYSTEM_ID; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state1), 'U', 0, PREFIX(public_id_initial_state2), INVALID_PUBLIC_ID) +SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state2), 'B', 0, PREFIX(public_id_initial_state3), INVALID_PUBLIC_ID) +SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state3), 'L', 0, PREFIX(public_id_initial_state4), INVALID_PUBLIC_ID) +SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state4), 'I', 0, PREFIX(public_id_initial_state5), INVALID_PUBLIC_ID) +SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state5), 'C', 0, PREFIX(public_id_ws_state), INVALID_PUBLIC_ID) + +FAXPP_Error +PREFIX(public_id_ws_state)(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = PREFIX(pubid_literal_start_state); + next_char(env); + break; +#ifdef ALLOW_PARAMETER_ENTITIES + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; +#endif + default: + env->state = PREFIX(pubid_literal_start_state); + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +PREFIX(pubid_literal_start_state)(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + return NO_ERROR; +#ifdef ALLOW_PARAMETER_ENTITIES + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; +#endif + case '"': + env->state = PREFIX(pubid_literal_quot_state); + break; + case '\'': + env->state = PREFIX(pubid_literal_apos_state); + break; + default: + next_char(env); + return EXPECTING_PUBID_LITERAL; + } + next_char(env); + token_start_position(env); + return NO_ERROR; +} + +FAXPP_Error +PREFIX(pubid_literal_apos_state)(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + case '\'': + env->state = PREFIX(public_id_ws_state2); + token_end_position(env); + report_token(PUBID_LITERAL_TOKEN, env); + next_char(env); + return NO_ERROR; + // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] + LINE_ENDINGS + // A-Z + case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: + case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: + case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: + case 0x58: case 0x59: case 0x5A: + // a-z + case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: + case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: + case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: + case 0x78: case 0x79: case 0x7A: + // 0-9 + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': + case '9': + case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':': + case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_': + case '%': + // Valid PubidChar + break; + default: + next_char(env); + return INVALID_CHAR_IN_PUBID_LITERAL; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +PREFIX(pubid_literal_quot_state)(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + case '"': + env->state = PREFIX(public_id_ws_state2); + token_end_position(env); + report_token(PUBID_LITERAL_TOKEN, env); + next_char(env); + return NO_ERROR; + // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] + LINE_ENDINGS + // A-Z + case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: + case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: + case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: + case 0x58: case 0x59: case 0x5A: + // a-z + case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: + case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: + case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: + case 0x78: case 0x79: case 0x7A: + // 0-9 + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': + case '9': + case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':': + case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_': + case '%': case '\'': + // Valid PubidChar + break; + default: + next_char(env); + return INVALID_CHAR_IN_PUBID_LITERAL; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +PREFIX(public_id_ws_state2)(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = PREFIX(public_id_ws_state3); + next_char(env); + break; +#ifdef ALLOW_PARAMETER_ENTITIES + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; +#endif +#ifdef SKIP_SYSTEM_LITERAL + case '>': + // Notation decls can skip the system literal + env->state = END_STATE; + return NO_ERROR; +#endif + default: + env->state = PREFIX(system_literal_start_state); + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +PREFIX(public_id_ws_state3)(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; +#ifdef ALLOW_PARAMETER_ENTITIES + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; +#endif +#ifdef SKIP_SYSTEM_LITERAL + case '>': + // Notation decls can skip the system literal + env->state = END_STATE; + return NO_ERROR; +#endif + default: + env->state = PREFIX(system_literal_start_state); + break; + } + return NO_ERROR; +} + +#undef SP_SINGLE_CHAR_STATE Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/tokenizer_states.c 2008-03-20 01:56:21 UTC (rev 47) @@ -450,47 +450,47 @@ else if(state == external_subset_decl_state) return "external_subset_decl_state"; - else if(state == system_id_initial_state1) - return "system_id_initial_state1"; - else if(state == system_id_initial_state2) - return "system_id_initial_state2"; - else if(state == system_id_initial_state3) - return "system_id_initial_state3"; - else if(state == system_id_initial_state4) - return "system_id_initial_state4"; - else if(state == system_id_initial_state5) - return "system_id_initial_state5"; - else if(state == system_id_ws_state) - return "system_id_ws_state"; - else if(state == system_literal_start_state) - return "system_literal_start_state"; - else if(state == system_literal_apos_state) - return "system_literal_apos_state"; - else if(state == system_literal_quot_state) - return "system_literal_quot_state"; + else if(state == doctype_system_id_initial_state1) + return "doctype_system_id_initial_state1"; + else if(state == doctype_system_id_initial_state2) + return "doctype_system_id_initial_state2"; + else if(state == doctype_system_id_initial_state3) + return "doctype_system_id_initial_state3"; + else if(state == doctype_system_id_initial_state4) + return "doctype_system_id_initial_state4"; + else if(state == doctype_system_id_initial_state5) + return "doctype_system_id_initial_state5"; + else if(state == doctype_system_id_ws_state) + return "doctype_system_id_ws_state"; + else if(state == doctype_system_literal_start_state) + return "doctype_system_literal_start_state"; + else if(state == doctype_system_literal_apos_state) + return "doctype_system_literal_apos_state"; + else if(state == doctype_system_literal_quot_state) + return "doctype_system_literal_quot_state"; - else if(state == public_id_initial_state1) - return "public_id_initial_state1"; - else if(state == public_id_initial_state2) - return "public_id_initial_state2"; - else if(state == public_id_initial_state3) - return "public_id_initial_state3"; - else if(state == public_id_initial_state4) - return "public_id_initial_state4"; - else if(state == public_id_initial_state5) - return "public_id_initial_state5"; - else if(state == public_id_ws_state) - return "public_id_ws_state"; - else if(state == pubid_literal_start_state) - return "pubid_literal_start_state"; - else if(state == pubid_literal_apos_state) - return "pubid_literal_apos_state"; - else if(state == pubid_literal_quot_state) - return "pubid_literal_quot_state"; - else if(state == public_id_ws_state2) - return "public_id_ws_state2"; - else if(state == public_id_ws_state3) - return "public_id_ws_state3"; + else if(state == doctype_public_id_initial_state1) + return "doctype_public_id_initial_state1"; + else if(state == doctype_public_id_initial_state2) + return "doctype_public_id_initial_state2"; + else if(state == doctype_public_id_initial_state3) + return "doctype_public_id_initial_state3"; + else if(state == doctype_public_id_initial_state4) + return "doctype_public_id_initial_state4"; + else if(state == doctype_public_id_initial_state5) + return "doctype_public_id_initial_state5"; + else if(state == doctype_public_id_ws_state) + return "doctype_public_id_ws_state"; + else if(state == doctype_pubid_literal_start_state) + return "doctype_pubid_literal_start_state"; + else if(state == doctype_pubid_literal_apos_state) + return "doctype_pubid_literal_apos_state"; + else if(state == doctype_pubid_literal_quot_state) + return "doctype_pubid_literal_quot_state"; + else if(state == doctype_public_id_ws_state2) + return "doctype_public_id_ws_state2"; + else if(state == doctype_public_id_ws_state3) + return "doctype_public_id_ws_state3"; else if(state == elementdecl_or_entitydecl_state) return "elementdecl_or_entitydecl_state"; @@ -797,6 +797,47 @@ else if(state == notationdecl_end_state) return "notationdecl_end_state"; + else if(state == notationdecl_system_id_initial_state1) + return "notationdecl_system_id_initial_state1"; + else if(state == notationdecl_system_id_initial_state2) + return "notationdecl_system_id_initial_state2"; + else if(state == notationdecl_system_id_initial_state3) + return "notationdecl_system_id_initial_state3"; + else if(state == notationdecl_system_id_initial_state4) + return "notationdecl_system_id_initial_state4"; + else if(state == notationdecl_system_id_initial_state5) + return "notationdecl_system_id_initial_state5"; + else if(state == notationdecl_system_id_ws_state) + return "notationdecl_system_id_ws_state"; + else if(state == notationdecl_system_literal_start_state) + return "notationdecl_system_literal_start_state"; + else if(state == notationdecl_system_literal_apos_state) + return "notationdecl_system_literal_apos_state"; + else if(state == notationdecl_system_literal_quot_state) + return "notationdecl_system_literal_quot_state"; + else if(state == notationdecl_public_id_initial_state1) + return "notationdecl_public_id_initial_state1"; + else if(state == notationdecl_public_id_initial_state2) + return "notationdecl_public_id_initial_state2"; + else if(state == notationdecl_public_id_initial_state3) + return "notationdecl_public_id_initial_state3"; + else if(state == notationdecl_public_id_initial_state4) + return "notationdecl_public_id_initial_state4"; + else if(state == notationdecl_public_id_initial_state5) + return "notationdecl_public_id_initial_state5"; + else if(state == notationdecl_public_id_ws_state) + return "notationdecl_public_id_ws_state"; + else if(state == notationdecl_pubid_literal_start_state) + return "notationdecl_pubid_literal_start_state"; + else if(state == notationdecl_pubid_literal_apos_state) + return "notationdecl_pubid_literal_apos_state"; + else if(state == notationdecl_pubid_literal_quot_state) + return "notationdecl_pubid_literal_quot_state"; + else if(state == notationdecl_public_id_ws_state2) + return "notationdecl_public_id_ws_state2"; + else if(state == notationdecl_public_id_ws_state3) + return "notationdecl_public_id_ws_state3"; + else if(state == entitydecl_initial_state1) return "entitydecl_initial_state1"; else if(state == entitydecl_initial_state1) @@ -837,6 +878,47 @@ else if(state == entitydecl_end_state) return "entitydecl_end_state"; + else if(state == entitydecl_system_id_initial_state1) + return "entitydecl_system_id_initial_state1"; + else if(state == entitydecl_system_id_initial_state2) + return "entitydecl_system_id_initial_state2"; + else if(state == entitydecl_system_id_initial_state3) + return "entitydecl_system_id_initial_state3"; + else if(state == entitydecl_system_id_initial_state4) + return "entitydecl_system_id_initial_state4"; + else if(state == entitydecl_system_id_initial_state5) + return "entitydecl_system_id_initial_state5"; + else if(state == entitydecl_system_id_ws_state) + return "entitydecl_system_id_ws_state"; + else if(state == entitydecl_system_literal_start_state) + return "entitydecl_system_literal_start_state"; + else if(state == entitydecl_system_literal_apos_state) + return "entitydecl_system_literal_apos_state"; + else if(state == entitydecl_system_literal_quot_state) + return "entitydecl_system_literal_quot_state"; + else if(state == entitydecl_public_id_initial_state1) + return "entitydecl_public_id_initial_state1"; + else if(state == entitydecl_public_id_initial_state2) + return "entitydecl_public_id_initial_state2"; + else if(state == entitydecl_public_id_initial_state3) + return "entitydecl_public_id_initial_state3"; + else if(state == entitydecl_public_id_initial_state4) + return "entitydecl_public_id_initial_state4"; + else if(state == entitydecl_public_id_initial_state5) + return "entitydecl_public_id_initial_state5"; + else if(state == entitydecl_public_id_ws_state) + return "entitydecl_public_id_ws_state"; + else if(state == entitydecl_pubid_literal_start_state) + return "entitydecl_pubid_literal_start_state"; + else if(state == entitydecl_pubid_literal_apos_state) + return "entitydecl_pubid_literal_apos_state"; + else if(state == entitydecl_pubid_literal_quot_state) + return "entitydecl_pubid_literal_quot_state"; + else if(state == entitydecl_public_id_ws_state2) + return "entitydecl_public_id_ws_state2"; + else if(state == entitydecl_public_id_ws_state3) + return "entitydecl_public_id_ws_state3"; + else if(state == paramentitydecl_name_state1) return "paramentitydecl_name_state1"; else if(state == paramentitydecl_name_state2) @@ -850,6 +932,47 @@ else if(state == paramentitydecl_end_state) return "paramentitydecl_end_state"; + else if(state == paramentitydecl_system_id_initial_state1) + return "paramentitydecl_system_id_initial_state1"; + else if(state == paramentitydecl_system_id_initial_state2) + return "paramentitydecl_system_id_initial_state2"; + else if(state == paramentitydecl_system_id_initial_state3) + return "paramentitydecl_system_id_initial_state3"; + else if(state == paramentitydecl_system_id_initial_state4) + return "paramentitydecl_system_id_initial_state4"; + else if(state == paramentitydecl_system_id_initial_state5) + return "paramentitydecl_system_id_initial_state5"; + else if(state == paramentitydecl_system_id_ws_state) + return "paramentitydecl_system_id_ws_state"; + else if(state == paramentitydecl_system_literal_start_state) + return "paramentitydecl_system_literal_start_state"; + else if(state == paramentitydecl_system_literal_apos_state) + return "paramentitydecl_system_literal_apos_state"; + else if(state == paramentitydecl_system_literal_quot_state) + return "paramentitydecl_system_literal_quot_state"; + else if(state == paramentitydecl_public_id_initial_state1) + return "paramentitydecl_public_id_initial_state1"; + else if(state == paramentitydecl_public_id_initial_state2) + return "paramentitydecl_public_id_initial_state2"; + else if(state == paramentitydecl_public_id_initial_state3) + return "paramentitydecl_public_id_initial_state3"; + else if(state == paramentitydecl_public_id_initial_state4) + return "paramentitydecl_public_id_initial_state4"; + else if(state == paramentitydecl_public_id_initial_state5) + return "paramentitydecl_public_id_initial_state5"; + else if(state == paramentitydecl_public_id_ws_state) + return "paramentitydecl_public_id_ws_state"; + else if(state == paramentitydecl_pubid_literal_start_state) + return "paramentitydecl_pubid_literal_start_state"; + else if(state == paramentitydecl_pubid_literal_apos_state) + return "paramentitydecl_pubid_literal_apos_state"; + else if(state == paramentitydecl_pubid_literal_quot_state) + return "paramentitydecl_pubid_literal_quot_state"; + else if(state == paramentitydecl_public_id_ws_state2) + return "paramentitydecl_public_id_ws_state2"; + else if(state == paramentitydecl_public_id_ws_state3) + return "paramentitydecl_public_id_ws_state3"; + else if(state == conditional_ws_state) return "conditional_ws_state"; else if(state == conditional_state1) Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/tokenizer_states.h 2008-03-20 01:56:21 UTC (rev 47) @@ -274,27 +274,27 @@ FAXPP_Error external_subset_markup_state(FAXPP_TokenizerEnv *env); FAXPP_Error external_subset_decl_state(FAXPP_TokenizerEnv *env); -FAXPP_Error system_id_initial_state1(FAXPP_TokenizerEnv *env); -FAXPP_Error system_id_initial_state2(FAXPP_TokenizerEnv *env); -FAXPP_Error system_id_initial_state3(FAXPP_TokenizerEnv *env); -FAXPP_Error system_id_initial_state4(FAXPP_TokenizerEnv *env); -FAXPP_Error system_id_initial_state5(FAXPP_TokenizerEnv *env); -FAXPP_Error system_id_ws_state(FAXPP_TokenizerEnv *env); -FAXPP_Error system_literal_start_state(FAXPP_TokenizerEnv *env); -FAXPP_Error system_literal_apos_state(FAXPP_TokenizerEnv *env); -FAXPP_Error system_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_system_literal_quot_state(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_initial_state1(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_initial_state2(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_initial_state3(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_initial_state4(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_initial_state5(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_ws_state(FAXPP_TokenizerEnv *env); -FAXPP_Error pubid_literal_start_state(FAXPP_TokenizerEnv *env); -FAXPP_Error pubid_literal_apos_state(FAXPP_TokenizerEnv *env); -FAXPP_Error pubid_literal_quot_state(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_ws_state2(FAXPP_TokenizerEnv *env); -FAXPP_Error public_id_ws_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_pubid_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_pubid_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_pubid_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_ws_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_public_id_ws_state3(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_or_entitydecl_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state1(FAXPP_TokenizerEnv *env); @@ -449,6 +449,28 @@ FAXPP_Error notationdecl_content_state(FAXPP_TokenizerEnv *env); FAXPP_Error notationdecl_end_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_system_literal_quot_state(FAXPP_TokenizerEnv *env); + +FAXPP_Error notationdecl_public_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_pubid_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_pubid_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_pubid_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_ws_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_public_id_ws_state3(FAXPP_TokenizerEnv *env); + FAXPP_Error entitydecl_initial_state1(FAXPP_TokenizerEnv *env); FAXPP_Error entitydecl_initial_state2(FAXPP_TokenizerEnv *env); FAXPP_Error entitydecl_initial_state3(FAXPP_TokenizerEnv *env); @@ -469,6 +491,28 @@ FAXPP_Error entitydecl_ndata_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error entitydecl_end_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_system_literal_quot_state(FAXPP_TokenizerEnv *env); + +FAXPP_Error entitydecl_public_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_pubid_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_pubid_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_pubid_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_ws_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_public_id_ws_state3(FAXPP_TokenizerEnv *env); + FAXPP_Error paramentitydecl_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error paramentitydecl_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error paramentitydecl_content_state(FAXPP_TokenizerEnv *env); @@ -476,6 +520,28 @@ FAXPP_Error paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env); FAXPP_Error paramentitydecl_end_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_system_literal_quot_state(FAXPP_TokenizerEnv *env); + +FAXPP_Error paramentitydecl_public_id_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_pubid_literal_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_pubid_literal_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_pubid_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_ws_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_public_id_ws_state3(FAXPP_TokenizerEnv *env); + FAXPP_Error conditional_ws_state(FAXPP_TokenizerEnv *env); FAXPP_Error conditional_state1(FAXPP_TokenizerEnv *env); FAXPP_Error conditional_state2(FAXPP_TokenizerEnv *env); Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/src/xml_parser.c 2008-03-20 01:56:21 UTC (rev 47) @@ -405,7 +405,7 @@ { uri->ptr = 0; uri->len = 0; - return p_find_ns_info(parser, prefix, uri); + return p_find_ns_info_impl(parser, prefix, uri); } unsigned int FAXPP_get_nesting_level(const FAXPP_Parser *parser) @@ -677,17 +677,6 @@ } \ } -#define p_copy_text_from_attr_value(text, attrval, env, buffer) \ -{ \ - if((attrval)->next == 0) { \ - p_copy_text_from_event((text), &(attrval)->value, (env), (buffer)); \ - } \ - else { \ - FAXPP_Error err = p_normalize_attr_value((text), (buffer), (attrval), (env)); \ - if(err != NO_ERROR) return err; \ - } \ -} - FAXPP_Error p_normalize_attr_value(FAXPP_Text *text, FAXPP_Buffer *buffer, const FAXPP_AttrValue *value, const FAXPP_ParserEnv *env) { FAXPP_Error err; @@ -1032,6 +1021,30 @@ return text_ptr == text_end; } +static int p_equals(const char *str, FAXPP_EncodeFunction encode, const FAXPP_Text *text) +{ + // No encoding represents a character with as many as 10 bytes + uint8_t encode_buffer[10]; + unsigned int encode_len; + + void *text_ptr = text->ptr; + void *text_end = text_ptr + text->len; + + while(*str != 0) { + if(text_ptr >= text_end) return 0; + + encode_len = encode(encode_buffer, encode_buffer + sizeof(encode_buffer), *str); + if((text_end - text_ptr) < encode_len || memcmp(encode_buffer, text_ptr, encode_len) != 0) { + return 0; + } + + text_ptr += encode_len; + ++str; + } + + return text_ptr == text_end; +} + static FAXPP_Error nc_start_document_next_event(FAXPP_ParserEnv *env) { FAXPP_NextEvent next; @@ -1048,7 +1061,7 @@ p_copy_text_from_token(&env->event.version, env, /*useTokenBuffer*/0); p_set_event_location_from_token(env); - if(p_case_insensitive_equals("1.1", env->tenv->transcoder.encode, &env->event.version)) { + if(p_equals("1.1", env->tenv->transcoder.encode, &env->event.version)) { if(env->xml_version == XML_VERSION_NOT_KNOWN) { env->xml_version = XML_VERSION_1_1; } @@ -1069,7 +1082,7 @@ case XML_DECL_STANDALONE_TOKEN: p_copy_text_from_token(&env->event.standalone, env, /*useTokenBuffer*/0); - if(p_case_insensitive_equals("YES", env->tenv->transcoder.encode, &env->event.standalone)) { + if(p_equals("yes", env->tenv->transcoder.encode, &env->event.standalone)) { env->standalone = 1; } break; @@ -1605,6 +1618,7 @@ err = p_parse_entity(env, ent, ATTRIBUTE_VALUE_ENTITY); if(err) goto error; + return NO_ERROR; } break; case PE_REFERENCE_TOKEN: @@ -1639,6 +1653,7 @@ p_set_text_from_text(&env->event.public_id, &bkup_public); if(err) goto error; + return NO_ERROR; } break; case PE_REFERENCE_IN_MARKUP_TOKEN: @@ -1660,7 +1675,7 @@ p_set_text_from_text(&env->event.public_id, &bkup_public); if(err) goto error; - break; + return NO_ERROR; case ELEMENTDECL_LPAR_TOKEN: cs = (FAXPP_ContentSpec*)malloc(sizeof(FAXPP_ContentSpec)); @@ -2045,6 +2060,7 @@ err = p_parse_entity(env, ent, ATTRIBUTE_VALUE_ENTITY); if(err) goto error; + return NO_ERROR; } else { p_reset_event(env); @@ -2119,10 +2135,36 @@ return err; } +static const char *xml_prefix = "xml"; +static const char *xmlns_prefix = "xmlns"; +static const char *xml_uri = "http://www.w3.org/XML/1998/namespace"; +static const char *xmlns_uri = "http://www.w3.org/2000/xmlns/"; + static FAXPP_Error p_add_ns_info(FAXPP_ParserEnv *env, const FAXPP_Attribute *attr) { - FAXPP_NamespaceInfo *nsinfo = env->namespace_pool; + FAXPP_NamespaceInfo *nsinfo; + // Check for invalid "xml" or "xmlns" namespace declarations + if(attr->prefix.len != 0) { + if(p_equals(xmlns_prefix, env->tenv->transcoder.encode, &attr->name)) + return INVALID_NAMESPACE_DECLARATION; + if(p_equals(xml_prefix, env->tenv->transcoder.encode, &attr->name) && + !p_equals(xml_uri, env->tenv->transcoder.encode, &attr->value.value)) + return INVALID_NAMESPACE_DECLARATION; + } + + if((attr->prefix.len == 0 || !p_equals(xml_prefix, env->tenv->transcoder.encode, &attr->name)) && + p_equals(xml_uri, env->tenv->transcoder.encode, &attr->value.value)) + return INVALID_NAMESPACE_DECLARATION; + + if(p_equals(xmlns_uri, env->tenv->transcoder.encode, &attr->value.value)) + return INVALID_NAMESPACE_DECLARATION; + + if(env->tenv->xml_char == CHAR10 && attr->prefix.len != 0 && attr->value.value.len == 0) + return INVALID_NAMESPACE_DECLARATION; + + // Add the namespace binding + nsinfo = env->namespace_pool; if(nsinfo == 0) { nsinfo = (FAXPP_NamespaceInfo*)malloc(sizeof(FAXPP_NamespaceInfo)); if(!nsinfo) return OUT_OF_MEMORY; @@ -2136,7 +2178,7 @@ env->namespace_stack = nsinfo; env->element_info_stack->ns = nsinfo; - p_copy_text_from_attr_value(&nsinfo->uri, &attr->value, env, &env->element_info_stack->buffer); + p_copy_text_from_event(&nsinfo->uri, &attr->value.value, env, &env->element_info_stack->buffer); if(attr->prefix.len != 0) { p_copy_text_from_event(&nsinfo->prefix, &attr->name, env, &env->element_info_stack->buffer); @@ -2166,6 +2208,10 @@ } // The prefix "xml" is always bound to the namespace URI "http://www.w3.org/XML/1998/namespace" + if(p_equals(xml_prefix, env->tenv->transcoder.encode, prefix)) { + p_copy_text_from_str(uri, &((FAXPP_ParserEnv*)env)->event_buffer, (FAXPP_ParserEnv*)env, xml_uri); + return NO_ERROR; + } return NO_URI_FOR_PREFIX; } @@ -2245,9 +2291,6 @@ env->element_info_pool = einfo; } -static const char *xml_uri = "http://www.w3.org/XML/1998/namespace"; -static const char *xmlns_uri = "http://www.w3.org/2000/xmlns/"; - static FAXPP_Error wf_next_event(FAXPP_ParserEnv *env) { int i, j; @@ -2271,8 +2314,9 @@ attr = &env->event.attrs[i]; /* Normalize the attribute values if required */ - if(env->tenv->normalize_attrs && - (attr->value.type != CHARACTERS_EVENT || attr->value.next != 0)) { + if(attr->xmlns_attr || attr->xml_attr || + (env->tenv->normalize_attrs && + (attr->value.type != CHARACTERS_EVENT || attr->value.next != 0))) { err = p_normalize_attr_value(&tmpText, &env->event_buffer, &attr->value, env); if(err != 0) return err; @@ -2287,7 +2331,7 @@ /* Check for namespace attributes */ if(attr->xmlns_attr) { err = p_add_ns_info(env, attr); - if(err != 0) { + if(err) { set_err_info_from_attr(env, attr); return err; } @@ -2313,6 +2357,13 @@ } else if(attr->xml_attr) { p_copy_text_from_str(&attr->uri, &env->event_buffer, env, xml_uri); + + if(p_equals("space", env->tenv->transcoder.encode, &attr->name) && + !p_equals("preserve", env->tenv->transcoder.encode, &attr->value.value) && + !p_equals("default", env->tenv->transcoder.encode, &attr->value.value)) { + set_err_info_from_attr(env, attr); + return INVALID_XMLSPACE_VALUE; + } } else if(attr->prefix.len != 0) { err = p_find_ns_info(env, &attr->prefix, &attr->uri); Modified: trunk/faxpp/tests/xmlconf_runner.c =================================================================== --- trunk/faxpp/tests/xmlconf_runner.c 2008-03-18 13:35:36 UTC (rev 46) +++ trunk/faxpp/tests/xmlconf_runner.c 2008-03-20 01:56:21 UTC (rev 47) @@ -192,10 +192,15 @@ else if(text_equal(event->name, "TEST")) { // TBD Check output - jpcs -/* if(find_attribute(event, "OUTPUT")) { */ +/* attr = find_attribute(event, "OUTPUT"); */ +/* if(attr) { */ +/* calculateBase(base_buffer, &attr->value, file_buffer); */ + /* printf("^"); */ +/* printf("\n%s\n", file_buffer); */ /* fflush(stdout); */ /* ++test_skips; */ +/* exit(-1); */ /* break; */ /* } */ @@ -213,11 +218,12 @@ break; } + attr = find_attribute(event, "TYPE"); + // Skip "error" type tests at the moment - since they // probably need detailed inspection to see which ones // ought to pass or fail // TBD enable these tests - jpcs - attr = find_attribute(event, "TYPE"); if(text_equal(attr->value.value, "error")) { printf("^"); fflush(stdout); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-03-18 13:36:22
|
Revision: 46 http://faxpp.svn.sourceforge.net/faxpp/?rev=46&view=rev Author: jpcs Date: 2008-03-18 06:35:36 -0700 (Tue, 18 Mar 2008) Log Message: ----------- Fixed various bugs shown up by the XML conformance tests. Modified Paths: -------------- trunk/faxpp/src/reference.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_parser.h Modified: trunk/faxpp/src/reference.c =================================================================== --- trunk/faxpp/src/reference.c 2008-03-17 10:41:03 UTC (rev 45) +++ trunk/faxpp/src/reference.c 2008-03-18 13:35:36 UTC (rev 46) @@ -334,6 +334,25 @@ FAXPP_Error parameter_entity_reference_state(FAXPP_TokenizerEnv *env) { + read_char(env); + + env->state = parameter_entity_reference_state2; + + switch(env->current_char) { + LINE_ENDINGS + default: + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ENTITY_REFERENCE; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +parameter_entity_reference_state2(FAXPP_TokenizerEnv *env) +{ while(1) { read_char(env); @@ -361,6 +380,25 @@ FAXPP_Error parameter_entity_reference_in_markup_state(FAXPP_TokenizerEnv *env) { + read_char(env); + + env->state = parameter_entity_reference_in_markup_state2; + + switch(env->current_char) { + LINE_ENDINGS + default: + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ENTITY_REFERENCE; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +parameter_entity_reference_in_markup_state2(FAXPP_TokenizerEnv *env) +{ while(1) { read_char(env); Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-03-17 10:41:03 UTC (rev 45) +++ trunk/faxpp/src/tokenizer_states.c 2008-03-18 13:35:36 UTC (rev 46) @@ -256,8 +256,12 @@ return "entity_reference_state"; else if(state == parameter_entity_reference_state) return "parameter_entity_reference_state"; + else if(state == parameter_entity_reference_state2) + return "parameter_entity_reference_state2"; else if(state == parameter_entity_reference_in_markup_state) return "parameter_entity_reference_in_markup_state"; + else if(state == parameter_entity_reference_in_markup_state2) + return "parameter_entity_reference_in_markup_state2"; else if(state == char_reference_state) return "char_reference_state"; else if(state == dec_char_reference_state) Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-03-17 10:41:03 UTC (rev 45) +++ trunk/faxpp/src/tokenizer_states.h 2008-03-18 13:35:36 UTC (rev 46) @@ -172,7 +172,9 @@ FAXPP_Error quot_entity_reference_state4(FAXPP_TokenizerEnv *env); FAXPP_Error entity_reference_state(FAXPP_TokenizerEnv *env); FAXPP_Error parameter_entity_reference_state(FAXPP_TokenizerEnv *env); +FAXPP_Error parameter_entity_reference_state2(FAXPP_TokenizerEnv *env); FAXPP_Error parameter_entity_reference_in_markup_state(FAXPP_TokenizerEnv *env); +FAXPP_Error parameter_entity_reference_in_markup_state2(FAXPP_TokenizerEnv *env); FAXPP_Error char_reference_state(FAXPP_TokenizerEnv *env); FAXPP_Error dec_char_reference_state(FAXPP_TokenizerEnv *env); FAXPP_Error hex_char_reference_state1(FAXPP_TokenizerEnv *env); Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-03-17 10:41:03 UTC (rev 45) +++ trunk/faxpp/src/xml_parser.c 2008-03-18 13:35:36 UTC (rev 46) @@ -272,6 +272,9 @@ env->current_attlist = 0; env->current_notation = 0; + env->standalone = 0; + env->xml_version = XML_VERSION_NOT_KNOWN; + // Put the element info objects back in the pool while(env->element_info_stack) { el = env->element_info_stack; @@ -1044,12 +1047,31 @@ p_reset_event(env); p_copy_text_from_token(&env->event.version, env, /*useTokenBuffer*/0); p_set_event_location_from_token(env); + + if(p_case_insensitive_equals("1.1", env->tenv->transcoder.encode, &env->event.version)) { + if(env->xml_version == XML_VERSION_NOT_KNOWN) { + env->xml_version = XML_VERSION_1_1; + } + else if(env->xml_version == XML_VERSION_1_0) { + err = UNKNOWN_XML_VERSION; + goto error; + } + } + else { + if(env->xml_version == XML_VERSION_NOT_KNOWN) { + env->xml_version = XML_VERSION_1_0; + } + } break; case XML_DECL_ENCODING_TOKEN: p_copy_text_from_token(&env->event.encoding, env, /*useTokenBuffer*/0); break; case XML_DECL_STANDALONE_TOKEN: p_copy_text_from_token(&env->event.standalone, env, /*useTokenBuffer*/0); + + if(p_case_insensitive_equals("YES", env->tenv->transcoder.encode, &env->event.standalone)) { + env->standalone = 1; + } break; case XML_DECL_END_TOKEN: env->next_event = nc_unsupported_encoding_next_event; @@ -1116,6 +1138,7 @@ default: env->tenv->buffered_token = 1; p_reset_event(env); + if(env->tenv->external_subset || env->tenv->external_in_markup_entity) { // TBD event for start of external subset - jpcs env->next_event = nc_dtd_next_event; @@ -1128,6 +1151,11 @@ env->event.type = START_DOCUMENT_EVENT; env->next_event = env->main_next_event; } + + if(env->xml_version == XML_VERSION_NOT_KNOWN) { + env->xml_version = XML_VERSION_1_0; + } + return NO_ERROR; } } @@ -1163,6 +1191,8 @@ p_force_copy_text_from_token(&ent->name, env, &env->entity_buffer); p_set_location_from_token(ent, env); + ent->from_internal_subset = env->tenv->internal_subset; + return NO_ERROR; } @@ -1196,6 +1226,8 @@ return list; } +static FAXPP_Error p_parse_entity_impl(FAXPP_ParserEnv *env, FAXPP_EntityInfo *ent, FAXPP_EntityParseState state, FAXPP_EntityInfo **initial_entity); + static FAXPP_Error p_parse_internal_entity(FAXPP_ParserEnv *env, FAXPP_EntityInfo *ent, FAXPP_EntityParseState state, FAXPP_EntityInfo **initial_entity) { FAXPP_EntityValue *entv; @@ -1205,7 +1237,7 @@ entv = ent->value; while(entv) { if(entv->entity_ref) { - err = p_parse_internal_entity(env, entv->entity_ref, state, initial_entity); + err = p_parse_entity_impl(env, entv->entity_ref, state, initial_entity); if(err) return err; } else { @@ -1256,6 +1288,40 @@ return err; } +static FAXPP_Error p_parse_entity_impl(FAXPP_ParserEnv *env, FAXPP_EntityInfo *ent, FAXPP_EntityParseState state, FAXPP_EntityInfo **initial_entity) +{ + FAXPP_Error err; + + if(ent->external) { + switch(state) { + case ELEMENT_CONTENT_ENTITY: state = EXTERNAL_PARSED_ENTITY; break; + case INTERNAL_DTD_ENTITY: state = EXTERNAL_SUBSET_ENTITY; break; + case EXTERNAL_DTD_ENTITY: state = EXTERNAL_SUBSET_ENTITY; break; + case IN_MARKUP_ENTITY: state = EXTERNAL_IN_MARKUP_ENTITY; break; + default: break; + } + + err = p_parse_external_entity(env, ent, state); + if(err) return err; + + // Set the entity on the first new tokenizer + if(*initial_entity) { + env->tenv->start_of_entity = 1; + env->tenv->entity = *initial_entity; + *initial_entity = 0; + } + else { + env->tenv->start_of_entity = 0; + } + } + else { + err = p_parse_internal_entity(env, ent, state, initial_entity); + if(err) return err; + } + + return NO_ERROR; +} + static const char single_space[] = {' '}; static FAXPP_Error p_parse_entity(FAXPP_ParserEnv *env, FAXPP_EntityInfo *ent, FAXPP_EntityParseState state) @@ -1264,6 +1330,7 @@ FAXPP_EntityInfo *tmp; // Check for a recursive entity + // TBD Need a better method for doing this - jpcs FAXPP_TokenizerEnv *tokenizer = env->tenv; while(tokenizer) { if(tokenizer->entity == ent) @@ -1517,6 +1584,10 @@ err = UNDEFINED_ENTITY; goto error; } + if(env->standalone && !ent->from_internal_subset) { + err = UNDEFINED_ENTITY; + goto error; + } // [WFC: Parsed Entity] if(ent->unparsed) { err = REFERENCE_TO_UNPARSED_ENTITY; @@ -1952,6 +2023,10 @@ err = UNDEFINED_ENTITY; goto error; } + if(env->standalone && !ent->from_internal_subset) { + err = UNDEFINED_ENTITY; + goto error; + } // [WFC: Parsed Entity] if(ent->unparsed) { err = REFERENCE_TO_UNPARSED_ENTITY; Modified: trunk/faxpp/src/xml_parser.h =================================================================== --- trunk/faxpp/src/xml_parser.h 2008-03-17 10:41:03 UTC (rev 45) +++ trunk/faxpp/src/xml_parser.h 2008-03-18 13:35:36 UTC (rev 46) @@ -68,6 +68,7 @@ unsigned int external:1; unsigned int unparsed:1; + unsigned int from_internal_subset:1; unsigned int line; unsigned int column; @@ -116,6 +117,14 @@ unsigned int current_attlist:1; unsigned int current_notation:1; + unsigned int standalone:1; + + enum { + XML_VERSION_NOT_KNOWN = 0, + XML_VERSION_1_0, + XML_VERSION_1_1 + } xml_version; + FAXPP_AttrValue *av_ptr; FAXPP_AttrValue *av_dealloc; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-03-17 10:41:08
|
Revision: 45 http://faxpp.svn.sourceforge.net/faxpp/?rev=45&view=rev Author: jpcs Date: 2008-03-17 03:41:03 -0700 (Mon, 17 Mar 2008) Log Message: ----------- Fully parse parameter entity references in element and attlist declarations. Fixed a number of miscelaneous bugs. Modified Paths: -------------- trunk/faxpp/include/faxpp/error.h trunk/faxpp/src/attlistdecl.c trunk/faxpp/src/elementdecl.c trunk/faxpp/src/error.c trunk/faxpp/src/pi.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_tokenizer.c trunk/faxpp/src/xml_tokenizer.h trunk/faxpp/src/xmldecl.c Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2008-03-15 10:59:42 UTC (rev 44) +++ trunk/faxpp/include/faxpp/error.h 2008-03-17 10:41:03 UTC (rev 45) @@ -70,6 +70,7 @@ INVALID_ELEMENTDECL_CONTENT, INVALID_CONDITIONAL_SECTION, IMPROPER_NESTING_OF_ENTITY, + PARAMETER_ENTITY_IN_INTERNAL_SUBSET, OUT_OF_MEMORY, ELEMENT_NAME_MISMATCH, Modified: trunk/faxpp/src/attlistdecl.c =================================================================== --- trunk/faxpp/src/attlistdecl.c 2008-03-15 10:59:42 UTC (rev 44) +++ trunk/faxpp/src/attlistdecl.c 2008-03-17 10:41:03 UTC (rev 45) @@ -45,9 +45,62 @@ SINGLE_CHAR_STATE(attlistdecl_initial_state3, 'L', 0, attlistdecl_initial_state4, INVALID_DTD_DECL) SINGLE_CHAR_STATE(attlistdecl_initial_state4, 'I', 0, attlistdecl_initial_state5, INVALID_DTD_DECL) SINGLE_CHAR_STATE(attlistdecl_initial_state5, 'S', 0, attlistdecl_initial_state6, INVALID_DTD_DECL) -SINGLE_CHAR_STATE(attlistdecl_initial_state6, 'T', attlistdecl_name_state1, ws_plus_state, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(attlistdecl_initial_state6, 'T', 0, attlistdecl_name_ws_state1, INVALID_DTD_DECL) FAXPP_Error +attlistdecl_name_ws_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = attlistdecl_name_ws_state2; + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = attlistdecl_name_state1; + token_start_position(env); + // No next_char + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_name_ws_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = attlistdecl_name_state1; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error attlistdecl_name_state1(FAXPP_TokenizerEnv *env) { read_char(env); @@ -73,10 +126,11 @@ switch(env->current_char) { WHITESPACE: - env->state = attlistdecl_attdef_name_state1; + case '%': + env->state = attlistdecl_attdef_name_ws_state1; token_end_position(env); report_token(ATTLISTDECL_NAME_TOKEN, env); - next_char(env); + // No next_char return NO_ERROR; case '>': env->state = attlistdecl_attdef_name_state1; @@ -130,10 +184,11 @@ switch(env->current_char) { WHITESPACE: - env->state = attlistdecl_attdef_name_state1; + case '%': + env->state = attlistdecl_attdef_name_ws_state1; token_end_position(env); report_token(ATTLISTDECL_NAME_TOKEN, env); - next_char(env); + // No next_char return NO_ERROR; case '>': env->state = attlistdecl_attdef_name_state1; @@ -155,18 +210,74 @@ } FAXPP_Error -attlistdecl_attdef_name_state1(FAXPP_TokenizerEnv *env) +attlistdecl_attdef_name_ws_state1(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { WHITESPACE: + env->state = attlistdecl_attdef_name_ws_state2; break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; case '>': base_state(env); report_empty_token(ATTLISTDECL_END_TOKEN, env); break; default: + env->state = attlistdecl_attdef_name_state1; + token_start_position(env); + // No next_char + return EXPECTING_WHITESPACE; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_attdef_name_ws_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = attlistdecl_attdef_name_state1; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_attdef_name_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '>': + base_state(env); + report_empty_token(ATTLISTDECL_END_TOKEN, env); + break; + LINE_ENDINGS + default: env->state = attlistdecl_attdef_name_state2; token_start_position(env); next_char(env); @@ -187,11 +298,11 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = attlistdecl_atttype_state; - env->state = ws_state; + case '%': + env->state = attlistdecl_atttype_ws_state1; token_end_position(env); report_token(ATTLISTDECL_ATTDEF_NAME_TOKEN, env); - next_char(env); + // No next_char return NO_ERROR; case ':': env->state = attlistdecl_attdef_name_seen_colon_state1; @@ -239,11 +350,11 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = attlistdecl_atttype_state; - env->state = ws_state; + case '%': + env->state = attlistdecl_atttype_ws_state1; token_end_position(env); report_token(ATTLISTDECL_ATTDEF_NAME_TOKEN, env); - next_char(env); + // No next_char return NO_ERROR; default: break; @@ -258,6 +369,59 @@ return NO_ERROR; } +FAXPP_Error +attlistdecl_atttype_ws_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = attlistdecl_atttype_ws_state2; + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = attlistdecl_atttype_state; + token_start_position(env); + // No next_char + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_atttype_ws_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = attlistdecl_atttype_state; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + /* [54] AttType ::= StringType | TokenizedType | EnumeratedType */ /* [55] StringType ::= 'CDATA' */ /* [56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default] */ @@ -294,8 +458,7 @@ env->state = attlistdecl_atttype_nmtoken_state1; break; case '(': - env->stored_state = attlistdecl_atttype_enumeration_name_state1; - env->state = ws_state; + env->state = attlistdecl_atttype_enumeration_name_ws_state; break; LINE_ENDINGS default: @@ -319,8 +482,7 @@ switch(env->current_char) { case 'Y': - env->stored_state = attlistdecl_default_state1; - env->state = ws_state; + env->state = attlistdecl_default_ws_state1; report_empty_token(ATTLISTDECL_ATTTYPE_ENTITY_TOKEN, env); break; case 'I': @@ -337,7 +499,7 @@ } SINGLE_CHAR_STATE(attlistdecl_atttype_entities_state1, 'E', 0, attlistdecl_atttype_entities_state2, INVALID_ATTRIBUTE_TYPE) -SINGLE_CHAR_STATE_RETURN(attlistdecl_atttype_entities_state2, 'S', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE, ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN) +SINGLE_CHAR_STATE_RETURN(attlistdecl_atttype_entities_state2, 'S', 0, attlistdecl_default_ws_state1, INVALID_ATTRIBUTE_TYPE, ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN) FAXPP_Error attlistdecl_atttype_nmtoken_state1(FAXPP_TokenizerEnv *env) @@ -374,13 +536,14 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = attlistdecl_default_state1; - env->state = ws_state; + case '%': + env->state = attlistdecl_default_ws_state1; report_empty_token(ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN, env); + // No next_char + return NO_ERROR; break; case 'S': - env->stored_state = attlistdecl_default_state1; - env->state = ws_plus_state; + env->state = attlistdecl_default_ws_state1; report_empty_token(ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN, env); break; default: @@ -397,17 +560,68 @@ SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state3, 'T', 0, attlistdecl_atttype_notation_state4, INVALID_ATTRIBUTE_TYPE) SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state4, 'I', 0, attlistdecl_atttype_notation_state5, INVALID_ATTRIBUTE_TYPE) SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state5, 'O', 0, attlistdecl_atttype_notation_state6, INVALID_ATTRIBUTE_TYPE) -SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state6, 'N', attlistdecl_atttype_notation_state7, ws_plus_state, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state6, 'N', 0, attlistdecl_atttype_notation_ws_state1, INVALID_ATTRIBUTE_TYPE) FAXPP_Error -attlistdecl_atttype_notation_state7(FAXPP_TokenizerEnv *env) +attlistdecl_atttype_notation_ws_state1(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { + WHITESPACE: + env->state = attlistdecl_atttype_notation_ws_state2; + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = attlistdecl_atttype_notation_lpar_state; + token_start_position(env); + // No next_char + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_atttype_notation_ws_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = attlistdecl_atttype_notation_lpar_state; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} +FAXPP_Error +attlistdecl_atttype_notation_lpar_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { case '(': - env->stored_state = attlistdecl_atttype_notation_name_state1; - env->state = ws_state; + env->state = attlistdecl_atttype_notation_name_ws_state; break; default: next_char(env); @@ -419,6 +633,32 @@ } FAXPP_Error +attlistdecl_atttype_notation_name_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = attlistdecl_atttype_notation_name_state1; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error attlistdecl_atttype_notation_name_state1(FAXPP_TokenizerEnv *env) { read_char(env); @@ -443,20 +683,19 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = attlistdecl_atttype_notation_separator_state; - env->state = ws_state; + case '%': + env->state = attlistdecl_atttype_notation_separator_ws_state; token_end_position(env); report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env); - break; + // No next_char + return NO_ERROR; case '|': - env->stored_state = attlistdecl_atttype_notation_name_state1; - env->state = ws_state; + env->state = attlistdecl_atttype_notation_name_ws_state; token_end_position(env); report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env); break; case ')': - env->stored_state = attlistdecl_default_state1; - env->state = ws_plus_state; + env->state = attlistdecl_default_ws_state1; token_end_position(env); report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env); break; @@ -472,18 +711,42 @@ } FAXPP_Error +attlistdecl_atttype_notation_separator_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = attlistdecl_atttype_notation_separator_state; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error attlistdecl_atttype_notation_separator_state(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { case '|': - env->stored_state = attlistdecl_atttype_notation_name_state1; - env->state = ws_state; + env->state = attlistdecl_atttype_notation_name_ws_state; break; case ')': - env->stored_state = attlistdecl_default_state1; - env->state = ws_plus_state; + env->state = attlistdecl_default_ws_state1; break; default: next_char(env); @@ -503,10 +766,11 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = attlistdecl_default_state1; - env->state = ws_state; + case '%': + env->state = attlistdecl_default_ws_state1; report_empty_token(ATTLISTDECL_ATTTYPE_ID_TOKEN, env); - break; + // No next_char + return NO_ERROR; case 'R': env->state = attlistdecl_atttype_idref_state1; break; @@ -529,13 +793,13 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = attlistdecl_default_state1; - env->state = ws_state; + case '%': + env->state = attlistdecl_default_ws_state1; report_empty_token(ATTLISTDECL_ATTTYPE_IDREF_TOKEN, env); - break; + // No next_char + return NO_ERROR; case 'S': - env->stored_state = attlistdecl_default_state1; - env->state = ws_plus_state; + env->state = attlistdecl_default_ws_state1; report_empty_token(ATTLISTDECL_ATTTYPE_IDREFS_TOKEN, env); break; default: @@ -550,9 +814,35 @@ SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state1, 'D', 0, attlistdecl_atttype_cdata_state2, INVALID_ATTRIBUTE_TYPE) SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state2, 'A', 0, attlistdecl_atttype_cdata_state3, INVALID_ATTRIBUTE_TYPE) SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state3, 'T', 0, attlistdecl_atttype_cdata_state4, INVALID_ATTRIBUTE_TYPE) -SINGLE_CHAR_STATE_RETURN(attlistdecl_atttype_cdata_state4, 'A', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE, ATTLISTDECL_ATTTYPE_CDATA_TOKEN) +SINGLE_CHAR_STATE_RETURN(attlistdecl_atttype_cdata_state4, 'A', 0, attlistdecl_default_ws_state1, INVALID_ATTRIBUTE_TYPE, ATTLISTDECL_ATTTYPE_CDATA_TOKEN) FAXPP_Error +attlistdecl_atttype_enumeration_name_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = attlistdecl_atttype_enumeration_name_state1; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error attlistdecl_atttype_enumeration_name_state1(FAXPP_TokenizerEnv *env) { read_char(env); @@ -562,7 +852,7 @@ default: env->state = attlistdecl_atttype_enumeration_name_state2; next_char(env); - if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + if(env->current_char != ':' && (FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) return INVALID_ATTRIBUTE_TYPE; break; } @@ -577,23 +867,24 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = attlistdecl_atttype_enumeration_separator_state; - env->state = ws_state; + case '%': + env->state = attlistdecl_atttype_enumeration_separator_ws_state; token_end_position(env); report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env); - break; + // No next_char + return NO_ERROR; case '|': - env->stored_state = attlistdecl_atttype_enumeration_name_state1; - env->state = ws_state; + env->state = attlistdecl_atttype_enumeration_name_ws_state; token_end_position(env); report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env); break; case ')': - env->stored_state = attlistdecl_default_state1; - env->state = ws_plus_state; + env->state = attlistdecl_default_ws_state1; token_end_position(env); report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env); break; + case ':': + break; default: next_char(env); if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) @@ -606,18 +897,42 @@ } FAXPP_Error +attlistdecl_atttype_enumeration_separator_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = attlistdecl_atttype_enumeration_separator_state; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error attlistdecl_atttype_enumeration_separator_state(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { case '|': - env->stored_state = attlistdecl_atttype_enumeration_name_state1; - env->state = ws_state; + env->state = attlistdecl_atttype_enumeration_name_ws_state; break; case ')': - env->stored_state = attlistdecl_default_state1; - env->state = ws_plus_state; + env->state = attlistdecl_default_ws_state1; break; default: next_char(env); @@ -628,6 +943,59 @@ return NO_ERROR; } +FAXPP_Error +attlistdecl_default_ws_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = attlistdecl_default_ws_state2; + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = attlistdecl_default_state1; + token_start_position(env); + // No next_char + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_default_ws_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = attlistdecl_default_state1; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + /* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' */ /* | (('#FIXED' S)? AttValue) [VC: Required Attribute] */ /* [VC: Attribute Default Value Syntactically Correct] */ @@ -690,7 +1058,7 @@ SINGLE_CHAR_STATE(attlistdecl_default_implied_state3, 'L', 0, attlistdecl_default_implied_state4, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_implied_state4, 'I', 0, attlistdecl_default_implied_state5, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_implied_state5, 'E', 0, attlistdecl_default_implied_state6, INVALID_DEFAULTDECL) -SINGLE_CHAR_STATE_RETURN(attlistdecl_default_implied_state6, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_IMPLIED_TOKEN) +SINGLE_CHAR_STATE_RETURN(attlistdecl_default_implied_state6, 'D', 0, attlistdecl_attdef_name_ws_state1, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_IMPLIED_TOKEN) SINGLE_CHAR_STATE(attlistdecl_default_required_state1, 'E', 0, attlistdecl_default_required_state2, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_required_state2, 'Q', 0, attlistdecl_default_required_state3, INVALID_DEFAULTDECL) @@ -698,7 +1066,7 @@ SINGLE_CHAR_STATE(attlistdecl_default_required_state4, 'I', 0, attlistdecl_default_required_state5, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_required_state5, 'R', 0, attlistdecl_default_required_state6, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_required_state6, 'E', 0, attlistdecl_default_required_state7, INVALID_DEFAULTDECL) -SINGLE_CHAR_STATE_RETURN(attlistdecl_default_required_state7, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_REQUIRED_TOKEN) +SINGLE_CHAR_STATE_RETURN(attlistdecl_default_required_state7, 'D', 0, attlistdecl_attdef_name_ws_state1, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_REQUIRED_TOKEN) SINGLE_CHAR_STATE(attlistdecl_default_fixed_state1, 'I', 0, attlistdecl_default_fixed_state2, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_fixed_state2, 'X', 0, attlistdecl_default_fixed_state3, INVALID_DEFAULTDECL) @@ -748,7 +1116,7 @@ switch(env->current_char) { case '\'': - env->state = attlistdecl_attdef_name_state1; + env->state = attlistdecl_attdef_name_ws_state1; token_end_position(env); report_token(ATTRIBUTE_VALUE_TOKEN, env); next_char(env); @@ -807,7 +1175,7 @@ switch(env->current_char) { case '"': - env->state = attlistdecl_attdef_name_state1; + env->state = attlistdecl_attdef_name_ws_state1; token_end_position(env); report_token(ATTRIBUTE_VALUE_TOKEN, env); next_char(env); Modified: trunk/faxpp/src/elementdecl.c =================================================================== --- trunk/faxpp/src/elementdecl.c 2008-03-15 10:59:42 UTC (rev 44) +++ trunk/faxpp/src/elementdecl.c 2008-03-17 10:41:03 UTC (rev 45) @@ -67,29 +67,57 @@ SINGLE_CHAR_STATE(elementdecl_initial_state2, 'M', 0, elementdecl_initial_state3, INVALID_DTD_DECL) SINGLE_CHAR_STATE(elementdecl_initial_state3, 'E', 0, elementdecl_initial_state4, INVALID_DTD_DECL) SINGLE_CHAR_STATE(elementdecl_initial_state4, 'N', 0, elementdecl_initial_state5, INVALID_DTD_DECL) -SINGLE_CHAR_STATE(elementdecl_initial_state5, 'T', 0, elementdecl_name_ws_state, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(elementdecl_initial_state5, 'T', 0, elementdecl_name_ws_state1, INVALID_DTD_DECL) FAXPP_Error -elementdecl_name_ws_state(FAXPP_TokenizerEnv *env) +elementdecl_name_ws_state1(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { WHITESPACE: + env->state = elementdecl_name_ws_state2; next_char(env); break; case '%': - // TBD only for external subset - jpcs store_state(env); env->state = parameter_entity_reference_in_markup_state; next_char(env); token_start_position(env); - return NO_ERROR; + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; default: env->state = elementdecl_name_state1; token_start_position(env); // No next_char + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +elementdecl_name_ws_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = elementdecl_name_state1; + token_start_position(env); + // No next_char + break; } return NO_ERROR; } @@ -120,11 +148,11 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = elementdecl_content_state; - env->state = ws_state; + case '%': + env->state = elementdecl_content_ws_state1; token_end_position(env); report_token(ELEMENTDECL_NAME_TOKEN, env); - next_char(env); + // No next_char return NO_ERROR; case ':': env->state = elementdecl_name_seen_colon_state1; @@ -172,11 +200,11 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = elementdecl_content_state; - env->state = ws_state; + case '%': + env->state = elementdecl_content_ws_state1; token_end_position(env); report_token(ELEMENTDECL_NAME_TOKEN, env); - next_char(env); + // No next_char return NO_ERROR; default: break; @@ -192,6 +220,59 @@ } FAXPP_Error +elementdecl_content_ws_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = elementdecl_content_ws_state2; + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = elementdecl_content_state; + token_start_position(env); + // No next_char + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +elementdecl_content_ws_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = elementdecl_content_state; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error elementdecl_content_state(FAXPP_TokenizerEnv *env) { read_char(env); @@ -205,8 +286,7 @@ break; case '(': env->elemdecl_content_level += 1; - env->stored_state = elementdecl_mixed_or_children_state; - env->state = ws_state; + env->state = elementdecl_mixed_or_children_ws_state; report_empty_token(ELEMENTDECL_LPAR_TOKEN, env); break; LINE_ENDINGS @@ -221,12 +301,38 @@ SINGLE_CHAR_STATE(elementdecl_empty_state1, 'M', 0, elementdecl_empty_state2, INVALID_ELEMENTDECL_CONTENT) SINGLE_CHAR_STATE(elementdecl_empty_state2, 'P', 0, elementdecl_empty_state3, INVALID_ELEMENTDECL_CONTENT) SINGLE_CHAR_STATE(elementdecl_empty_state3, 'T', 0, elementdecl_empty_state4, INVALID_ELEMENTDECL_CONTENT) -SINGLE_CHAR_STATE_RETURN(elementdecl_empty_state4, 'Y', elementdecl_end_state, ws_state, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_EMPTY_TOKEN) +SINGLE_CHAR_STATE_RETURN(elementdecl_empty_state4, 'Y', 0, elementdecl_end_ws_state, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_EMPTY_TOKEN) SINGLE_CHAR_STATE(elementdecl_any_state1, 'N', 0, elementdecl_any_state2, INVALID_ELEMENTDECL_CONTENT) -SINGLE_CHAR_STATE_RETURN(elementdecl_any_state2, 'Y', elementdecl_end_state, ws_state, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_ANY_TOKEN) +SINGLE_CHAR_STATE_RETURN(elementdecl_any_state2, 'Y', 0, elementdecl_end_ws_state, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_ANY_TOKEN) FAXPP_Error +elementdecl_mixed_or_children_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = elementdecl_mixed_or_children_state; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error elementdecl_mixed_or_children_state(FAXPP_TokenizerEnv *env) { read_char(env); @@ -246,6 +352,32 @@ } FAXPP_Error +elementdecl_cp_name_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = elementdecl_cp_name_state1; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error elementdecl_cp_name_state1(FAXPP_TokenizerEnv *env) { read_char(env); @@ -253,8 +385,7 @@ switch(env->current_char) { case '(': env->elemdecl_content_level += 1; - env->stored_state = elementdecl_cp_name_state1; - env->state = ws_state; + env->state = elementdecl_cp_name_ws_state; report_empty_token(ELEMENTDECL_LPAR_TOKEN, env); next_char(env); break; @@ -277,11 +408,11 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = elementdecl_cp_separator_or_end_state; - env->state = ws_state; + case '%': + env->state = elementdecl_cp_separator_or_end_ws_state; token_end_position(env); report_token(ELEMENTDECL_NAME_TOKEN, env); - next_char(env); + // No next_char return NO_ERROR; case '?': case '*': @@ -345,11 +476,11 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = elementdecl_cp_separator_or_end_state; - env->state = ws_state; + case '%': + env->state = elementdecl_cp_separator_or_end_ws_state; token_end_position(env); report_token(ELEMENTDECL_NAME_TOKEN, env); - next_char(env); + // No next_char return NO_ERROR; case '?': case '*': @@ -385,11 +516,12 @@ { read_char(env); - if(env->elemdecl_content_level == 0) - env->stored_state = elementdecl_end_state; - else - env->stored_state = elementdecl_cp_separator_or_end_state; - env->state = ws_state; + if(env->elemdecl_content_level == 0) { + env->state = elementdecl_end_ws_state; + } + else { + env->state = elementdecl_cp_separator_or_end_ws_state; + } switch(env->current_char) { case '?': @@ -411,19 +543,43 @@ } FAXPP_Error +elementdecl_cp_separator_or_end_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = elementdecl_cp_separator_or_end_state; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error elementdecl_cp_separator_or_end_state(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { case '|': - env->stored_state = elementdecl_cp_name_state1; - env->state = ws_state; + env->state = elementdecl_cp_name_ws_state; report_empty_token(ELEMENTDECL_BAR_TOKEN, env); break; case ',': - env->stored_state = elementdecl_cp_name_state1; - env->state = ws_state; + env->state = elementdecl_cp_name_ws_state; report_empty_token(ELEMENTDECL_COMMA_TOKEN, env); break; case ')': @@ -445,9 +601,35 @@ SINGLE_CHAR_STATE(elementdecl_pcdata_state3, 'D', 0, elementdecl_pcdata_state4, INVALID_ELEMENTDECL_CONTENT) SINGLE_CHAR_STATE(elementdecl_pcdata_state4, 'A', 0, elementdecl_pcdata_state5, INVALID_ELEMENTDECL_CONTENT) SINGLE_CHAR_STATE(elementdecl_pcdata_state5, 'T', 0, elementdecl_pcdata_state6, INVALID_ELEMENTDECL_CONTENT) -SINGLE_CHAR_STATE_RETURN(elementdecl_pcdata_state6, 'A', elementdecl_pcdata_end_or_names_state1, ws_state, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_PCDATA_TOKEN) +SINGLE_CHAR_STATE_RETURN(elementdecl_pcdata_state6, 'A', 0, elementdecl_pcdata_end_or_names_ws_state1, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_PCDATA_TOKEN) FAXPP_Error +elementdecl_pcdata_end_or_names_ws_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = elementdecl_pcdata_end_or_names_state1; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error elementdecl_pcdata_end_or_names_state1(FAXPP_TokenizerEnv *env) { read_char(env); @@ -459,8 +641,7 @@ report_empty_token(ELEMENTDECL_RPAR_TOKEN, env); break; case '|': - env->stored_state = elementdecl_pcdata_name_state1; - env->state = ws_state; + env->state = elementdecl_pcdata_name_ws_state; report_empty_token(ELEMENTDECL_BAR_TOKEN, env); break; default: @@ -482,8 +663,7 @@ next_char(env); // Fall through default: - env->stored_state = elementdecl_end_state; - env->state = ws_state; + env->state = elementdecl_end_ws_state; // No next_char break; } @@ -491,6 +671,32 @@ } FAXPP_Error +elementdecl_pcdata_end_or_names_ws_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = elementdecl_pcdata_end_or_names_state2; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error elementdecl_pcdata_end_or_names_state2(FAXPP_TokenizerEnv *env) { read_char(env); @@ -502,8 +708,7 @@ report_empty_token(ELEMENTDECL_RPAR_TOKEN, env); break; case '|': - env->stored_state = elementdecl_pcdata_name_state1; - env->state = ws_state; + env->state = elementdecl_pcdata_name_ws_state; report_empty_token(ELEMENTDECL_BAR_TOKEN, env); break; default: @@ -519,8 +724,7 @@ { read_char(env); - env->stored_state = elementdecl_end_state; - env->state = ws_state; + env->state = elementdecl_end_ws_state; switch(env->current_char) { case '*': @@ -535,6 +739,32 @@ } FAXPP_Error +elementdecl_pcdata_name_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = elementdecl_pcdata_name_state1; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error elementdecl_pcdata_name_state1(FAXPP_TokenizerEnv *env) { read_char(env); @@ -560,13 +790,14 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = elementdecl_pcdata_end_or_names_state2; - env->state = ws_state; + case '%': + env->state = elementdecl_pcdata_end_or_names_ws_state2; token_end_position(env); report_token(ELEMENTDECL_NAME_TOKEN, env); - next_char(env); + // No next_char return NO_ERROR; case ')': + case '|': env->state = elementdecl_pcdata_end_or_names_state2; token_end_position(env); report_token(ELEMENTDECL_NAME_TOKEN, env); @@ -618,13 +849,14 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = elementdecl_pcdata_end_or_names_state2; - env->state = ws_state; + case '%': + env->state = elementdecl_pcdata_end_or_names_ws_state2; token_end_position(env); report_token(ELEMENTDECL_NAME_TOKEN, env); - next_char(env); + // No next_char return NO_ERROR; case ')': + case '|': env->state = elementdecl_pcdata_end_or_names_state2; token_end_position(env); report_token(ELEMENTDECL_NAME_TOKEN, env); @@ -644,6 +876,32 @@ } FAXPP_Error +elementdecl_end_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity) + return NO_ERROR; + return PARAMETER_ENTITY_IN_INTERNAL_SUBSET; + default: + env->state = elementdecl_end_state; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error elementdecl_end_state(FAXPP_TokenizerEnv *env) { read_char(env); Modified: trunk/faxpp/src/error.c =================================================================== --- trunk/faxpp/src/error.c 2008-03-15 10:59:42 UTC (rev 44) +++ trunk/faxpp/src/error.c 2008-03-17 10:41:03 UTC (rev 45) @@ -135,6 +135,8 @@ return "INVALID_CONDITIONAL_SECTION"; case IMPROPER_NESTING_OF_ENTITY: return "IMPROPER_NESTING_OF_ENTITY"; + case PARAMETER_ENTITY_IN_INTERNAL_SUBSET: + return "PARAMETER_ENTITY_IN_INTERNAL_SUBSET"; case NO_ERROR: break; } Modified: trunk/faxpp/src/pi.c =================================================================== --- trunk/faxpp/src/pi.c 2008-03-15 10:59:42 UTC (rev 44) +++ trunk/faxpp/src/pi.c 2008-03-17 10:41:03 UTC (rev 45) @@ -138,9 +138,7 @@ default: env->state = pi_content_state; token_start_position(env); - next_char(env); - if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) - return RESTRICTED_CHAR; + // No next_char break; } return NO_ERROR; Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-03-15 10:59:42 UTC (rev 44) +++ trunk/faxpp/src/tokenizer_states.c 2008-03-17 10:41:03 UTC (rev 45) @@ -500,8 +500,10 @@ return "elementdecl_initial_state4"; else if(state == elementdecl_initial_state5) return "elementdecl_initial_state5"; - else if(state == elementdecl_name_ws_state) - return "elementdecl_name_ws_state"; + else if(state == elementdecl_name_ws_state1) + return "elementdecl_name_ws_state1"; + else if(state == elementdecl_name_ws_state2) + return "elementdecl_name_ws_state2"; else if(state == elementdecl_name_state1) return "elementdecl_name_state1"; else if(state == elementdecl_name_state2) @@ -510,6 +512,10 @@ return "elementdecl_name_seen_colon_state1"; else if(state == elementdecl_name_seen_colon_state2) return "elementdecl_name_seen_colon_state2"; + else if(state == elementdecl_content_ws_state1) + return "elementdecl_content_ws_state1"; + else if(state == elementdecl_content_ws_state2) + return "elementdecl_content_ws_state2"; else if(state == elementdecl_content_state) return "elementdecl_content_state"; else if(state == elementdecl_empty_state1) @@ -524,8 +530,12 @@ return "elementdecl_any_state1"; else if(state == elementdecl_any_state2) return "elementdecl_any_state2"; + else if(state == elementdecl_mixed_or_children_ws_state) + return "elementdecl_mixed_or_children_ws_state"; else if(state == elementdecl_mixed_or_children_state) return "elementdecl_mixed_or_children_state"; + else if(state == elementdecl_cp_name_ws_state) + return "elementdecl_cp_name_ws_state"; else if(state == elementdecl_cp_name_state1) return "elementdecl_cp_name_state1"; else if(state == elementdecl_cp_name_state2) @@ -536,6 +546,8 @@ return "elementdecl_cp_name_seen_colon_state2"; else if(state == elementdecl_cp_cardinality_state) return "elementdecl_cp_cardinality_state"; + else if(state == elementdecl_cp_separator_or_end_ws_state) + return "elementdecl_cp_separator_or_end_ws_state"; else if(state == elementdecl_cp_separator_or_end_state) return "elementdecl_cp_separator_or_end_state"; else if(state == elementdecl_pcdata_state1) @@ -550,14 +562,20 @@ return "elementdecl_pcdata_state5"; else if(state == elementdecl_pcdata_state6) return "elementdecl_pcdata_state6"; + else if(state == elementdecl_pcdata_end_or_names_ws_state1) + return "elementdecl_pcdata_end_or_names_ws_state1"; else if(state == elementdecl_pcdata_end_or_names_state1) return "elementdecl_pcdata_end_or_names_state1"; else if(state == elementdecl_pcdata_optional_star_state) return "elementdecl_pcdata_optional_star_state"; + else if(state == elementdecl_pcdata_end_or_names_ws_state2) + return "elementdecl_pcdata_end_or_names_ws_state2"; else if(state == elementdecl_pcdata_end_or_names_state2) return "elementdecl_pcdata_end_or_names_state2"; else if(state == elementdecl_pcdata_star_state) return "elementdecl_pcdata_star_state"; + else if(state == elementdecl_pcdata_name_ws_state) + return "elementdecl_pcdata_name_ws_state"; else if(state == elementdecl_pcdata_name_state1) return "elementdecl_pcdata_name_state1"; else if(state == elementdecl_pcdata_name_state2) @@ -566,6 +584,8 @@ return "elementdecl_pcdata_name_seen_colon_state1"; else if(state == elementdecl_pcdata_name_seen_colon_state2) return "elementdecl_pcdata_name_seen_colon_state2"; + else if(state == elementdecl_end_ws_state) + return "elementdecl_end_ws_state"; else if(state == elementdecl_end_state) return "elementdecl_end_state"; @@ -583,6 +603,10 @@ return "attlistdecl_initial_state5"; else if(state == attlistdecl_initial_state6) return "attlistdecl_initial_state6"; + else if(state == attlistdecl_name_ws_state1) + return "attlistdecl_name_ws_state1"; + else if(state == attlistdecl_name_ws_state2) + return "attlistdecl_name_ws_state2"; else if(state == attlistdecl_name_state1) return "attlistdecl_name_state1"; else if(state == attlistdecl_name_state2) @@ -591,6 +615,10 @@ return "attlistdecl_name_seen_colon_state1"; else if(state == attlistdecl_name_seen_colon_state2) return "attlistdecl_name_seen_colon_state2"; + else if(state == attlistdecl_attdef_name_ws_state1) + return "attlistdecl_attdef_name_ws_state1"; + else if(state == attlistdecl_attdef_name_ws_state2) + return "attlistdecl_attdef_name_ws_state2"; else if(state == attlistdecl_attdef_name_state1) return "attlistdecl_attdef_name_state1"; else if(state == attlistdecl_attdef_name_state2) @@ -599,6 +627,11 @@ return "attlistdecl_attdef_name_seen_colon_state1"; else if(state == attlistdecl_attdef_name_seen_colon_state2) return "attlistdecl_attdef_name_seen_colon_state2"; + + else if(state == attlistdecl_atttype_ws_state1) + return "attlistdecl_atttype_ws_state1"; + else if(state == attlistdecl_atttype_ws_state2) + return "attlistdecl_atttype_ws_state2"; else if(state == attlistdecl_atttype_state) return "attlistdecl_atttype_state"; else if(state == attlistdecl_atttype_cdata_state1) @@ -659,21 +692,37 @@ return "attlistdecl_atttype_notation_state5"; else if(state == attlistdecl_atttype_notation_state6) return "attlistdecl_atttype_notation_state6"; - else if(state == attlistdecl_atttype_notation_state7) - return "attlistdecl_atttype_notation_state7"; + else if(state == attlistdecl_atttype_notation_ws_state1) + return "attlistdecl_atttype_notation_ws_state1"; + else if(state == attlistdecl_atttype_notation_ws_state2) + return "attlistdecl_atttype_notation_ws_state2"; + else if(state == attlistdecl_atttype_notation_lpar_state) + return "attlistdecl_atttype_notation_lpar_state"; + else if(state == attlistdecl_atttype_notation_name_ws_state) + return "attlistdecl_atttype_notation_name_ws_state"; else if(state == attlistdecl_atttype_notation_name_state1) return "attlistdecl_atttype_notation_name_state1"; else if(state == attlistdecl_atttype_notation_name_state2) return "attlistdecl_atttype_notation_name_state2"; + else if(state == attlistdecl_atttype_notation_separator_ws_state) + return "attlistdecl_atttype_notation_separator_ws_state"; else if(state == attlistdecl_atttype_notation_separator_state) return "attlistdecl_atttype_notation_separator_state"; + else if(state == attlistdecl_atttype_enumeration_name_ws_state) + return "attlistdecl_atttype_enumeration_name_ws_state"; else if(state == attlistdecl_atttype_enumeration_name_state1) return "attlistdecl_atttype_enumeration_name_state1"; else if(state == attlistdecl_atttype_enumeration_name_state2) return "attlistdecl_atttype_enumeration_name_state2"; + else if(state == attlistdecl_atttype_enumeration_separator_ws_state) + return "attlistdecl_atttype_enumeration_separator_ws_state"; else if(state == attlistdecl_atttype_enumeration_separator_state) return "attlistdecl_atttype_enumeration_separator_state"; + else if(state == attlistdecl_default_ws_state1) + return "attlistdecl_default_ws_state1"; + else if(state == attlistdecl_default_ws_state2) + return "attlistdecl_default_ws_state2"; else if(state == attlistdecl_default_state1) return "attlistdecl_default_state1"; else if(state == attlistdecl_default_state2) Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-03-15 10:59:42 UTC (rev 44) +++ trunk/faxpp/src/tokenizer_states.h 2008-03-17 10:41:03 UTC (rev 45) @@ -300,11 +300,14 @@ FAXPP_Error elementdecl_initial_state3(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state4(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state5(FAXPP_TokenizerEnv *env); -FAXPP_Error elementdecl_name_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_name_ws_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_name_ws_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_seen_colon_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_content_ws_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_content_ws_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_content_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_empty_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_empty_state2(FAXPP_TokenizerEnv *env); @@ -312,12 +315,15 @@ FAXPP_Error elementdecl_empty_state4(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_any_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_any_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_mixed_or_children_ws_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_mixed_or_children_state(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_cp_name_ws_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_cp_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_cp_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_cp_name_seen_colon_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_cp_name_seen_colon_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_cp_cardinality_state(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_cp_separator_or_end_ws_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_cp_separator_or_end_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_pcdata_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_pcdata_state2(FAXPP_TokenizerEnv *env); @@ -325,14 +331,18 @@ FAXPP_Error elementdecl_pcdata_state4(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_pcdata_state5(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_pcdata_state6(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_end_or_names_ws_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_pcdata_end_or_names_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_pcdata_optional_star_state(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_end_or_names_ws_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_pcdata_end_or_names_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_pcdata_star_state(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_name_ws_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_pcdata_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_pcdata_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_pcdata_name_seen_colon_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_pcdata_name_seen_colon_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_end_ws_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_end_state(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_initial_state1(FAXPP_TokenizerEnv *env); @@ -341,15 +351,21 @@ FAXPP_Error attlistdecl_initial_state4(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_initial_state5(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_initial_state6(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_name_ws_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_name_ws_state2(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_name_seen_colon_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attdef_name_ws_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attdef_name_ws_state2(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_attdef_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_attdef_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_attdef_name_seen_colon_state1(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_attdef_name_seen_colon_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_ws_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_ws_state2(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_atttype_state(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_atttype_cdata_state1(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_atttype_cdata_state2(FAXPP_TokenizerEnv *env); @@ -380,14 +396,22 @@ FAXPP_Error attlistdecl_atttype_notation_state4(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_atttype_notation_state5(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_atttype_notation_state6(FAXPP_TokenizerEnv *env); -FAXPP_Error attlistdecl_atttype_notation_state7(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_ws_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_ws_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_lpar_state(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_name_ws_state(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_atttype_notation_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_atttype_notation_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_separator_ws_state(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_atttype_notation_separator_state(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_enumeration_name_ws_state(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_atttype_enumeration_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_atttype_enumeration_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_enumeration_separator_ws_state(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_atttype_enumeration_separator_state(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_ws_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_ws_state2(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_default_state1(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_default_state2(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_default_implied_state1(FAXPP_TokenizerEnv *env); @@ -492,9 +516,9 @@ return BAD_ENCODING; \ } \ \ -/* printf("%03d:%03d Tok:%p L:%03d State: %s, Byte: %c, Char: %08X\n", (env)->line, (env)->column, */ \ -/* (env), (env)->nesting_level, FAXPP_state_to_string((env)->state), *(unsigned char*)(env)->position, */ \ -/* (env)->current_char); */ \ +/* printf("%03d:%03d Tok:%p L:%03d EL:%03d State: %s, Byte: %c, Char: %08X\n", (env)->line, (env)->column, */ \ +/* (env), (env)->nesting_level, (env)->elemdecl_content_level, FAXPP_state_to_string((env)->state), */ \ +/* *(unsigned char*)(env)->position, (env)->current_char); */ \ } #define read_char(env) \ @@ -563,7 +587,7 @@ (env)->state = final_state; \ else if((env)->internal_subset) \ (env)->state = internal_subset_state; \ - else if((env)->in_markup_entity) \ + else if((env)->in_markup_entity || (env)->external_in_markup_entity) \ (env)->state = (env)->prev->state; \ else (env)->state = initial_misc_state; \ } Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-03-15 10:59:42 UTC (rev 44) +++ trunk/faxpp/src/xml_parser.c 2008-03-17 10:41:03 UTC (rev 45) @@ -430,7 +430,7 @@ static void p_change_buffer(FAXPP_Buffer *buffer, void *newBuffer, void **text) { - if(*text >= buffer->buffer && *text < (buffer->buffer + buffer->length)) { + if(*text >= buffer->buffer && *text <= (buffer->buffer + buffer->length)) { *text += newBuffer - buffer->buffer; } } @@ -514,6 +514,11 @@ p_change_buffer(buffer, newBuffer, &tokenizer->buffer_end); p_change_buffer(buffer, newBuffer, &tokenizer->position); + p_change_buffer(buffer, newBuffer, &tokenizer->result_token.value.ptr); + p_change_buffer(buffer, newBuffer, &tokenizer->token.value.ptr); + p_change_buffer(buffer, newBuffer, &tokenizer->token_position1); + p_change_buffer(buffer, newBuffer, &tokenizer->token_position2); + tokenizer = tokenizer->prev; } } @@ -585,37 +590,6 @@ el = el->prev; } - // Copy any strings in the entity lists which point to the old buffer - // into the entity buffer - FAXPP_EntityInfo *ent; - FAXPP_EntityValue *entv; - - ent = env->general_entities; - while(ent) { - p_move_text_to_buffer(env, &ent->name, &env->entity_buffer); - - entv = ent->value; - while(entv) { - p_move_text_to_buffer(env, &entv->value, &env->entity_buffer); - entv = entv->prev; - } - - ent = ent->next; - } - - ent = env->parameter_entities; - while(ent) { - p_move_text_to_buffer(env, &ent->name, &env->entity_buffer); - - entv = ent->value; - while(entv) { - p_move_text_to_buffer(env, &entv->value, &env->entity_buffer); - entv = entv->prev; - } - - ent = ent->next; - } - return NO_ERROR; } @@ -654,10 +628,12 @@ /* strncpy(buf, env->tenv->result_token.value.ptr, env->tenv->result_token.value.len); */ /* buf[env->tenv->result_token.value.len] = 0; */ /* } */ -/* printf("%03d:%03d Token ID: %s, Token: \"%s\"\n", env->tenv->result_token.line, env->tenv->result_token.column, FAXPP_token_to_string(env->tenv->result_token.type), buf); */ +/* printf("%03d:%03d Token ID: %s, Token: \"%s\"\n", env->tenv->result_token.line, */ +/* env->tenv->result_token.column, FAXPP_token_to_string(env->tenv->result_token.type), buf); */ /* } */ /* else { */ -/* printf("%03d:%03d Token ID: %s\n", env->tenv->result_token.line, env->tenv->result_token.column, FAXPP_token_to_string(env->tenv->result_token.type)); */ +/* printf("%03d:%03d Token ID: %s\n", env->tenv->result_token.line, env->tenv->result_token.column, */ +/* FAXPP_token_to_string(env->tenv->result_token.type)); */ /* } */ /* } */ @@ -729,9 +705,18 @@ return NO_ERROR; } +#define p_force_copy_text_from_token(text, env, buf) \ +{ \ + (text)->len = (env)->tenv->result_token.value.len; \ + (text)->ptr = (buf)->cursor; \ + FAXPP_Error err = FAXPP_buffer_append((buf), (env)->tenv->result_token.value.ptr, (env)->tenv->result_token.value.len); \ + if((env)->tenv->null_terminate && err == 0) \ + err = FAXPP_buffer_append_ch((buf), (env)->tenv->transcoder.encode, 0); \ + if(err != 0) return err; \ +} + #define p_copy_text_from_token_with_buffer(text,... [truncated message content] |
From: <jp...@us...> - 2008-03-15 10:59:43
|
Revision: 44 http://faxpp.svn.sourceforge.net/faxpp/?rev=44&view=rev Author: jpcs Date: 2008-03-15 03:59:42 -0700 (Sat, 15 Mar 2008) Log Message: ----------- Started work on parsing and expanding parameter entity references in DTD markup. Modified Paths: -------------- trunk/faxpp/examples/parser_example.c trunk/faxpp/include/faxpp/error.h trunk/faxpp/include/faxpp/parser.h trunk/faxpp/include/faxpp/token.h trunk/faxpp/src/conditional.c trunk/faxpp/src/doctype.c trunk/faxpp/src/elementdecl.c trunk/faxpp/src/error.c trunk/faxpp/src/reference.c trunk/faxpp/src/token.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_tokenizer.c trunk/faxpp/src/xml_tokenizer.h trunk/faxpp/src/xmldecl.c Modified: trunk/faxpp/examples/parser_example.c =================================================================== --- trunk/faxpp/examples/parser_example.c 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/examples/parser_example.c 2008-03-15 10:59:42 UTC (rev 44) @@ -76,7 +76,7 @@ } while((err = FAXPP_next_event(parser)) == 0) { -/* output_event(FAXPP_get_current_event(parser), stdout); */ + output_event(FAXPP_get_current_event(parser), stdout); if(FAXPP_get_current_event(parser)->type == END_DOCUMENT_EVENT) break; Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/include/faxpp/error.h 2008-03-15 10:59:42 UTC (rev 44) @@ -69,6 +69,7 @@ INVALID_DEFAULTDECL, INVALID_ELEMENTDECL_CONTENT, INVALID_CONDITIONAL_SECTION, + IMPROPER_NESTING_OF_ENTITY, OUT_OF_MEMORY, ELEMENT_NAME_MISMATCH, Modified: trunk/faxpp/include/faxpp/parser.h =================================================================== --- trunk/faxpp/include/faxpp/parser.h 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/include/faxpp/parser.h 2008-03-15 10:59:42 UTC (rev 44) @@ -92,8 +92,9 @@ /// The type of external entity to parse typedef enum { - EXTERNAL_PARSED_ENTITY = 0, ///< An external parsed entity - EXTERNAL_SUBSET_ENTITY = 1 ///< An external subset (DTD) + EXTERNAL_PARSED_ENTITY = 0, ///< An external parsed entity + EXTERNAL_SUBSET_ENTITY = 1, ///< An external subset (DTD) + EXTERNAL_IN_MARKUP_ENTITY = 2 ///< An external entity inside DTD markup } FAXPP_EntityType; /** Modified: trunk/faxpp/include/faxpp/token.h =================================================================== --- trunk/faxpp/include/faxpp/token.h 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/include/faxpp/token.h 2008-03-15 10:59:42 UTC (rev 44) @@ -64,6 +64,7 @@ DEC_CHAR_REFERENCE_TOKEN, HEX_CHAR_REFERENCE_TOKEN, PE_REFERENCE_TOKEN, + PE_REFERENCE_IN_MARKUP_TOKEN, DOCTYPE_PREFIX_TOKEN, DOCTYPE_NAME_TOKEN, Modified: trunk/faxpp/src/conditional.c =================================================================== --- trunk/faxpp/src/conditional.c 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/src/conditional.c 2008-03-15 10:59:42 UTC (rev 44) @@ -37,6 +37,29 @@ return NO_ERROR; \ } +FAXPP_Error +conditional_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + return NO_ERROR; + default: + env->state = conditional_state1; + // No next_char + break; + } + return NO_ERROR; +} + SINGLE_CHAR_STATE(conditional_state1, 'I', 0, conditional_state2, INVALID_CONDITIONAL_SECTION) FAXPP_Error @@ -47,11 +70,9 @@ switch(env->current_char) { case 'N': env->state = include_state1; - env->nesting_level += 1; break; case 'G': env->state = ignore_state1; - env->nesting_level += 1; break; LINE_ENDINGS default: @@ -66,9 +87,28 @@ SINGLE_CHAR_STATE(ignore_state2, 'O', 0, ignore_state3, INVALID_CONDITIONAL_SECTION) SINGLE_CHAR_STATE(ignore_state3, 'R', 0, ignore_state4, INVALID_CONDITIONAL_SECTION) SINGLE_CHAR_STATE(ignore_state4, 'E', ignore_state5, ws_state, INVALID_CONDITIONAL_SECTION) -SINGLE_CHAR_STATE(ignore_state5, '[', 0, ignore_content_state, INVALID_CONDITIONAL_SECTION) FAXPP_Error +ignore_state5(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '[': + env->ignore_start_level = env->nesting_level; + env->nesting_level += 1; + env->state = ignore_content_state; + next_char(env); + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_CONDITIONAL_SECTION; + } + return NO_ERROR; +} + +FAXPP_Error ignore_content_state(FAXPP_TokenizerEnv *env) { read_char(env); @@ -169,7 +209,8 @@ break; case '>': env->nesting_level -= 1; - if(env->nesting_level == 0) { + if(env->nesting_level == env->ignore_start_level) { + env->nesting_level -= 1; base_state(env); } else { @@ -194,4 +235,23 @@ SINGLE_CHAR_STATE(include_state3, 'U', 0, include_state4, INVALID_CONDITIONAL_SECTION) SINGLE_CHAR_STATE(include_state4, 'D', 0, include_state5, INVALID_CONDITIONAL_SECTION) SINGLE_CHAR_STATE(include_state5, 'E', include_state6, ws_state, INVALID_CONDITIONAL_SECTION) -SINGLE_CHAR_STATE(include_state6, '[', 0, external_subset_state, INVALID_CONDITIONAL_SECTION) + +FAXPP_Error +include_state6(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '[': + env->nesting_level += 1; + env->state = external_subset_state; + next_char(env); + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_CONDITIONAL_SECTION; + } + return NO_ERROR; +} + Modified: trunk/faxpp/src/doctype.c =================================================================== --- trunk/faxpp/src/doctype.c 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/src/doctype.c 2008-03-15 10:59:42 UTC (rev 44) @@ -726,7 +726,7 @@ next_char(env); return INVALID_DTD_DECL; case '>': - env->nesting_level -= 1; + env->nesting_level -= 2; base_state(env); break; default: @@ -772,8 +772,8 @@ env->state = comment_start_state2; break; case '[': - env->stored_state = conditional_state1; - env->state = ws_state; + env->nesting_level += 1; + env->state = conditional_ws_state; break; case 'E': env->state = elementdecl_or_entitydecl_state; Modified: trunk/faxpp/src/elementdecl.c =================================================================== --- trunk/faxpp/src/elementdecl.c 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/src/elementdecl.c 2008-03-15 10:59:42 UTC (rev 44) @@ -24,6 +24,7 @@ switch(env->current_char) { case 'L': + env->nesting_level += 1; env->state = elementdecl_initial_state1; next_char(env); break; @@ -66,9 +67,34 @@ SINGLE_CHAR_STATE(elementdecl_initial_state2, 'M', 0, elementdecl_initial_state3, INVALID_DTD_DECL) SINGLE_CHAR_STATE(elementdecl_initial_state3, 'E', 0, elementdecl_initial_state4, INVALID_DTD_DECL) SINGLE_CHAR_STATE(elementdecl_initial_state4, 'N', 0, elementdecl_initial_state5, INVALID_DTD_DECL) -SINGLE_CHAR_STATE(elementdecl_initial_state5, 'T', elementdecl_name_state1, ws_plus_state, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(elementdecl_initial_state5, 'T', 0, elementdecl_name_ws_state, INVALID_DTD_DECL) FAXPP_Error +elementdecl_name_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '%': + // TBD only for external subset - jpcs + store_state(env); + env->state = parameter_entity_reference_in_markup_state; + next_char(env); + token_start_position(env); + return NO_ERROR; + default: + env->state = elementdecl_name_state1; + token_start_position(env); + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error elementdecl_name_state1(FAXPP_TokenizerEnv *env) { read_char(env); @@ -624,6 +650,7 @@ switch(env->current_char) { case '>': + env->nesting_level -= 1; base_state(env); report_empty_token(ELEMENTDECL_END_TOKEN, env); break; Modified: trunk/faxpp/src/error.c =================================================================== --- trunk/faxpp/src/error.c 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/src/error.c 2008-03-15 10:59:42 UTC (rev 44) @@ -133,6 +133,8 @@ return "INVALID_ELEMENTDECL_CONTENT"; case INVALID_CONDITIONAL_SECTION: return "INVALID_CONDITIONAL_SECTION"; + case IMPROPER_NESTING_OF_ENTITY: + return "IMPROPER_NESTING_OF_ENTITY"; case NO_ERROR: break; } Modified: trunk/faxpp/src/reference.c =================================================================== --- trunk/faxpp/src/reference.c 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/src/reference.c 2008-03-15 10:59:42 UTC (rev 44) @@ -359,6 +359,33 @@ } FAXPP_Error +parameter_entity_reference_in_markup_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + break; + case ';': + retrieve_state(env); + token_end_position(env); + report_token(PE_REFERENCE_IN_MARKUP_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_ENTITY_REFERENCE; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error char_reference_state(FAXPP_TokenizerEnv *env) { read_char(env); Modified: trunk/faxpp/src/token.c =================================================================== --- trunk/faxpp/src/token.c 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/src/token.c 2008-03-15 10:59:42 UTC (rev 44) @@ -54,6 +54,8 @@ return "ENTITY_REFERENCE_TOKEN"; case PE_REFERENCE_TOKEN: return "PE_REFERENCE_TOKEN"; + case PE_REFERENCE_IN_MARKUP_TOKEN: + return "PE_REFERENCE_IN_MARKUP_TOKEN"; case DEC_CHAR_REFERENCE_TOKEN: return "DEC_CHAR_REFERENCE_TOKEN"; case HEX_CHAR_REFERENCE_TOKEN: Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/src/tokenizer_states.c 2008-03-15 10:59:42 UTC (rev 44) @@ -256,6 +256,8 @@ return "entity_reference_state"; else if(state == parameter_entity_reference_state) return "parameter_entity_reference_state"; + else if(state == parameter_entity_reference_in_markup_state) + return "parameter_entity_reference_in_markup_state"; else if(state == char_reference_state) return "char_reference_state"; else if(state == dec_char_reference_state) @@ -498,6 +500,8 @@ return "elementdecl_initial_state4"; else if(state == elementdecl_initial_state5) return "elementdecl_initial_state5"; + else if(state == elementdecl_name_ws_state) + return "elementdecl_name_ws_state"; else if(state == elementdecl_name_state1) return "elementdecl_name_state1"; else if(state == elementdecl_name_state2) @@ -793,6 +797,8 @@ else if(state == paramentitydecl_end_state) return "paramentitydecl_end_state"; + else if(state == conditional_ws_state) + return "conditional_ws_state"; else if(state == conditional_state1) return "conditional_state1"; else if(state == conditional_state2) Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/src/tokenizer_states.h 2008-03-15 10:59:42 UTC (rev 44) @@ -172,6 +172,7 @@ FAXPP_Error quot_entity_reference_state4(FAXPP_TokenizerEnv *env); FAXPP_Error entity_reference_state(FAXPP_TokenizerEnv *env); FAXPP_Error parameter_entity_reference_state(FAXPP_TokenizerEnv *env); +FAXPP_Error parameter_entity_reference_in_markup_state(FAXPP_TokenizerEnv *env); FAXPP_Error char_reference_state(FAXPP_TokenizerEnv *env); FAXPP_Error dec_char_reference_state(FAXPP_TokenizerEnv *env); FAXPP_Error hex_char_reference_state1(FAXPP_TokenizerEnv *env); @@ -299,6 +300,7 @@ FAXPP_Error elementdecl_initial_state3(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state4(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_name_ws_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env); @@ -448,6 +450,7 @@ FAXPP_Error paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env); FAXPP_Error paramentitydecl_end_state(FAXPP_TokenizerEnv *env); +FAXPP_Error conditional_ws_state(FAXPP_TokenizerEnv *env); FAXPP_Error conditional_state1(FAXPP_TokenizerEnv *env); FAXPP_Error conditional_state2(FAXPP_TokenizerEnv *env); FAXPP_Error ignore_state1(FAXPP_TokenizerEnv *env); @@ -489,8 +492,8 @@ return BAD_ENCODING; \ } \ \ -/* printf("%03d:%03d State: %s, Byte: %c, Char: %08X\n", (env)->line, (env)->column, */ \ -/* FAXPP_state_to_string((env)->state), *(unsigned char*)(env)->position, */ \ +/* printf("%03d:%03d Tok:%p L:%03d State: %s, Byte: %c, Char: %08X\n", (env)->line, (env)->column, */ \ +/* (env), (env)->nesting_level, FAXPP_state_to_string((env)->state), *(unsigned char*)(env)->position, */ \ /* (env)->current_char); */ \ } @@ -560,6 +563,8 @@ (env)->state = final_state; \ else if((env)->internal_subset) \ (env)->state = internal_subset_state; \ + else if((env)->in_markup_entity) \ + (env)->state = (env)->prev->state; \ else (env)->state = initial_misc_state; \ } Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/src/xml_parser.c 2008-03-15 10:59:42 UTC (rev 44) @@ -1069,7 +1069,7 @@ case XML_DECL_END_TOKEN: env->next_event = nc_unsupported_encoding_next_event; - if(env->tenv->external_subset) { + if(env->tenv->external_subset || env->tenv->in_markup_entity) { // TBD event for start of external subset - jpcs next = nc_dtd_next_event; } @@ -1131,7 +1131,7 @@ default: env->tenv->buffered_token = 1; p_reset_event(env); - if(env->tenv->external_subset) { + if(env->tenv->external_subset || env->tenv->in_markup_entity) { // TBD event for start of external subset - jpcs env->next_event = nc_dtd_next_event; } @@ -1201,14 +1201,6 @@ #define p_compare_text(a, b) (((a)->len == (b)->len) ? memcmp((a)->ptr, (b)->ptr, (a)->len) : ((a)->len - (b)->len)) -/* static int p_compare_text(const FAXPP_Text *a, const FAXPP_Text *b) */ -/* { */ -/* int cmp = a->len - b->len; */ -/* if(cmp != 0) return cmp; */ - -/* return memcmp(a->ptr, b->ptr, a->len); */ -/* } */ - static FAXPP_EntityInfo *p_find_entity_info(const FAXPP_Text *name, FAXPP_EntityInfo *list) { while(list) { @@ -1238,9 +1230,6 @@ env->tenv->line = entv->line; env->tenv->column = entv->column; - if(state == EXTERNAL_PARSED_ENTITY) - env->next_event = nc_start_document_next_event; - // Set the entity on the first new tokenizer if(*initial_entity) { env->tenv->start_of_entity = 1; @@ -1282,8 +1271,13 @@ return err; } +static const char single_space[] = {' '}; + static FAXPP_Error p_parse_entity(FAXPP_ParserEnv *env, FAXPP_EntityInfo *ent, FAXPP_EntityParseState state) { + FAXPP_Error err; + FAXPP_EntityInfo *tmp; + // Check for a recursive entity FAXPP_TokenizerEnv *tokenizer = env->tenv; while(tokenizer) { @@ -1293,18 +1287,47 @@ tokenizer = tokenizer->prev; } + if(state == IN_MARKUP_ENTITY || state == EXTERNAL_IN_MARKUP_ENTITY) { + // Add a space after the entity inside DTD markup + err = FAXPP_push_entity_tokenizer(&env->tenv, IN_MARKUP_ENTITY, (void*)single_space, 1, /*done*/1); + if(err) return err; + + env->tenv->line = ent->line; + env->tenv->column = ent->column; + + FAXPP_set_tokenizer_decode(env->tenv, FAXPP_utf8_decode); + } + if(ent->external) { switch(state) { case ELEMENT_CONTENT_ENTITY: state = EXTERNAL_PARSED_ENTITY; break; case INTERNAL_DTD_ENTITY: state = EXTERNAL_SUBSET_ENTITY; break; case EXTERNAL_DTD_ENTITY: state = EXTERNAL_SUBSET_ENTITY; break; + case IN_MARKUP_ENTITY: state = EXTERNAL_IN_MARKUP_ENTITY; break; default: break; } - return p_parse_external_entity(env, ent, state); + err = p_parse_external_entity(env, ent, state); + if(err) return err; } + else { + tmp = ent; + err = p_parse_internal_entity(env, ent, state, &tmp); + if(err) return err; - return p_parse_internal_entity(env, ent, state, &ent); + if(state == IN_MARKUP_ENTITY || state == EXTERNAL_IN_MARKUP_ENTITY) { + // Add a space before the entity inside DTD markup + err = FAXPP_push_entity_tokenizer(&env->tenv, IN_MARKUP_ENTITY, (void*)single_space, 1, /*done*/1); + if(err) return err; + + env->tenv->line = ent->line; + env->tenv->column = ent->column; + + FAXPP_set_tokenizer_decode(env->tenv, FAXPP_utf8_decode); + } + } + + return NO_ERROR; } static Char32 p_dec_char_ref_value(const FAXPP_Text *text, FAXPP_ParserEnv *env) @@ -1563,7 +1586,27 @@ if(err) goto error; } break; + case PE_REFERENCE_IN_MARKUP_TOKEN: + // Parameter entity references cannot be forward references - + // so we go ahead and look them up straight away + ent = p_find_entity_info(&env->tenv->result_token.value, env->parameter_entities); + // [VC: Entity Declared] + if(ent == 0) { + err = UNDEFINED_ENTITY; + goto error; + } + p_set_text_from_text(&bkup_system, &env->event.system_id); + p_set_text_from_text(&bkup_public, &env->event.public_id); + + err = p_parse_entity(env, ent, IN_MARKUP_ENTITY); + + p_set_text_from_text(&env->event.system_id, &bkup_system); + p_set_text_from_text(&env->event.public_id, &bkup_public); + + if(err) goto error; + break; + case ELEMENTDECL_LPAR_TOKEN: cs = (FAXPP_ContentSpec*)malloc(sizeof(FAXPP_ContentSpec)); memset(cs, 0, sizeof(FAXPP_ContentSpec)); @@ -1699,6 +1742,7 @@ case PUBID_LITERAL_TOKEN: case NDATA_NAME_TOKEN: case PE_REFERENCE_TOKEN: + case PE_REFERENCE_IN_MARKUP_TOKEN: case ELEMENTDECL_PREFIX_TOKEN: case ELEMENTDECL_NAME_TOKEN: case ELEMENTDECL_EMPTY_TOKEN: Modified: trunk/faxpp/src/xml_tokenizer.c =================================================================== --- trunk/faxpp/src/xml_tokenizer.c 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/src/xml_tokenizer.c 2008-03-15 10:59:42 UTC (rev 44) @@ -392,6 +392,7 @@ env->nesting_level = 0; env->elemdecl_content_level = 0; + env->ignore_start_level = 0; env->do_encode = 1; env->seen_doctype = 0; @@ -403,6 +404,7 @@ env->internal_dtd_entity = 0; env->external_dtd_entity = 0; env->external_parsed_entity = 0; + env->in_markup_entity = 0; env->start_of_entity = 0; env->start_of_file = 0; @@ -470,6 +472,7 @@ env->external_dtd_entity = state == EXTERNAL_DTD_ENTITY; env->external_parsed_entity = state == EXTERNAL_PARSED_ENTITY; env->external_subset = state == EXTERNAL_SUBSET_ENTITY; + env->in_markup_entity = state == IN_MARKUP_ENTITY || state == EXTERNAL_IN_MARKUP_ENTITY; FAXPP_set_tokenizer_decode(env, env->prev->transcoder.decode); @@ -491,8 +494,12 @@ case EXTERNAL_DTD_ENTITY: env->state = external_subset_state; break; + case IN_MARKUP_ENTITY: + env->state = env->prev->state; + break; case EXTERNAL_PARSED_ENTITY: case EXTERNAL_SUBSET_ENTITY: + case EXTERNAL_IN_MARKUP_ENTITY: env->state = initial_state; break; } @@ -515,7 +522,11 @@ *list = env->prev; if(env->start_of_entity) { - if(env->stored_state != 0 || env->nesting_level != 0 || env->elemdecl_content_level != 0 || + if(env->in_markup_entity) { + if(env->nesting_level != 0 || env->elemdecl_content_level != 0) + err = IMPROPER_NESTING_OF_ENTITY; + } + else if(env->stored_state != 0 || env->nesting_level != 0 || env->elemdecl_content_level != 0 || (env->element_entity && env->state != parsed_entity_state && env->state != default_element_content_rsquare_state1 && env->state != default_element_content_rsquare_state2) || @@ -527,7 +538,8 @@ err = INCOMPLETE_MARKUP_IN_ENTITY_VALUE; } } - else { + + if(env->in_markup_entity || !env->start_of_entity) { // Force the old tokenizer token to point into the token buffer FAXPP_tokenizer_release_buffer(env, 0); Modified: trunk/faxpp/src/xml_tokenizer.h =================================================================== --- trunk/faxpp/src/xml_tokenizer.h 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/src/xml_tokenizer.h 2008-03-15 10:59:42 UTC (rev 44) @@ -52,6 +52,7 @@ unsigned int nesting_level; unsigned int elemdecl_content_level; + unsigned int ignore_start_level; unsigned int do_encode:1; unsigned int buffer_done:1; @@ -64,6 +65,7 @@ unsigned int internal_dtd_entity:1; unsigned int external_dtd_entity:1; unsigned int external_parsed_entity:1; + unsigned int in_markup_entity:1; unsigned int normalize_attrs:1; unsigned int user_provided_decode:1; @@ -99,14 +101,16 @@ struct FAXPP_TokenizerEnv_s *prev; }; -// The first two values are the same as the values in FAXPP_EntityType +// The first three values are the same as the values in FAXPP_EntityType typedef enum { EXTERNAL_PARSED_ENTITY2 = EXTERNAL_PARSED_ENTITY, EXTERNAL_SUBSET_ENTITY2 = EXTERNAL_SUBSET_ENTITY, + EXTERNAL_IN_MARKUP_ENTITY2 = EXTERNAL_IN_MARKUP_ENTITY, ELEMENT_CONTENT_ENTITY, INTERNAL_DTD_ENTITY, EXTERNAL_DTD_ENTITY, + IN_MARKUP_ENTITY, ATTRIBUTE_VALUE_ENTITY } FAXPP_EntityParseState; Modified: trunk/faxpp/src/xmldecl.c =================================================================== --- trunk/faxpp/src/xmldecl.c 2008-03-14 15:24:54 UTC (rev 43) +++ trunk/faxpp/src/xmldecl.c 2008-03-15 10:59:42 UTC (rev 44) @@ -16,6 +16,7 @@ #include "tokenizer_states.h" #include "char_classes.h" +#include "xml_parser.h" FAXPP_Error xml_decl_or_markup_state(FAXPP_TokenizerEnv *env) @@ -29,7 +30,10 @@ token_start_position(env); break; case '!': - if(env->external_subset) + // TBD Do this in all other places where it's not an XMLDecl - jpcs + if(env->in_markup_entity) + return INVALID_DTD_DECL; // TBD is this right? - jpcs + else if(env->external_subset) env->state = external_subset_markup_state; else env->state = initial_markup_state; @@ -164,7 +168,7 @@ env->state = xml_decl_version_state2; break; case 'e': - if(env->external_parsed_entity || env->external_subset) { + if(env->external_parsed_entity || env->external_subset || env->in_markup_entity) { env->state = xml_decl_encoding_state2; break; } @@ -330,13 +334,13 @@ WHITESPACE: break; case '?': - if(env->external_parsed_entity || env->external_subset) goto invalid_char; + if(env->external_parsed_entity || env->external_subset || env->in_markup_entity) goto invalid_char; env->state = xml_decl_seen_question_state; token_start_position(env); break; case 's': - if(env->external_parsed_entity || env->external_subset) goto invalid_char; + if(env->external_parsed_entity || env->external_subset || env->in_markup_entity) goto invalid_char; env->state = xml_decl_standalone_state2; break; @@ -524,7 +528,7 @@ next_char(env); break; case 's': - if(!env->external_parsed_entity && !env->external_subset) { + if(!env->external_parsed_entity && !env->external_subset && !env->in_markup_entity) { env->state = xml_decl_standalone_state2; next_char(env); break; @@ -698,17 +702,42 @@ return NO_ERROR; } +static const char single_space[] = {' '}; + FAXPP_Error xml_decl_seen_question_state(FAXPP_TokenizerEnv *env) { + FAXPP_Error err; + FAXPP_TokenizerEnv *tok; + read_char(env); switch(env->current_char) { case '>': base_state(env); + report_empty_token(XML_DECL_END_TOKEN, env); next_char(env); token_start_position(env); + + if(env->in_markup_entity) { + // Add a space before the entity inside DTD markup + err = FAXPP_push_entity_tokenizer(&env, IN_MARKUP_ENTITY, (void*)single_space, 1, /*done*/1); + if(err) return err; + + tok = env; + while(tok && tok->entity == 0) { + tok = tok->prev; + } + + if(tok) { + env->line = tok->entity->line; + env->column = tok->entity->column; + } + + FAXPP_set_tokenizer_decode(env, FAXPP_utf8_decode); + } + break; LINE_ENDINGS default: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-03-14 15:24:58
|
Revision: 43 http://faxpp.svn.sourceforge.net/faxpp/?rev=43&view=rev Author: jpcs Date: 2008-03-14 08:24:54 -0700 (Fri, 14 Mar 2008) Log Message: ----------- Added the ability to parse conditional sections in external DTDs. Added tokens for attlist declarations. Fixed a few memory bugs shown by valgrind. Modified Paths: -------------- trunk/faxpp/Makefile.am trunk/faxpp/Makefile.in trunk/faxpp/include/faxpp/error.h trunk/faxpp/include/faxpp/token.h trunk/faxpp/src/attlistdecl.c trunk/faxpp/src/doctype.c trunk/faxpp/src/elementdecl.c trunk/faxpp/src/error.c trunk/faxpp/src/token.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_parser.h trunk/faxpp/src/xml_tokenizer.c trunk/faxpp/src/xml_tokenizer.h trunk/faxpp/tests/xmlconf_runner.c Added Paths: ----------- trunk/faxpp/src/conditional.c Modified: trunk/faxpp/Makefile.am =================================================================== --- trunk/faxpp/Makefile.am 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/Makefile.am 2008-03-14 15:24:54 UTC (rev 43) @@ -36,7 +36,8 @@ src/elementdecl.c \ src/attlistdecl.c \ src/notationdecl.c \ -src/entitydecl.c +src/entitydecl.c \ +src/conditional.c tokenizer_example_LDADD = libfaxpp.la tokenizer_example_SOURCES = examples/tokenizer_example.c Modified: trunk/faxpp/Makefile.in =================================================================== --- trunk/faxpp/Makefile.in 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/Makefile.in 2008-03-14 15:24:54 UTC (rev 43) @@ -64,7 +64,7 @@ element.lo error.lo event.lo pi.lo reference.lo token.lo \ tokenizer_states.lo transcode.lo xmldecl.lo xml_parser.lo \ xml_tokenizer.lo doctype.lo elementdecl.lo attlistdecl.lo \ - notationdecl.lo entitydecl.lo + notationdecl.lo entitydecl.lo conditional.lo libfaxpp_la_OBJECTS = $(am_libfaxpp_la_OBJECTS) libfaxpp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -259,7 +259,8 @@ src/elementdecl.c \ src/attlistdecl.c \ src/notationdecl.c \ -src/entitydecl.c +src/entitydecl.c \ +src/conditional.c tokenizer_example_LDADD = libfaxpp.la tokenizer_example_SOURCES = examples/tokenizer_example.c @@ -394,6 +395,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cdata.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/char_classes.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/comment.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/conditional.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doctype.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/element.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elementdecl.Plo@am__quote@ @@ -576,6 +578,13 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o entitydecl.lo `test -f 'src/entitydecl.c' || echo '$(srcdir)/'`src/entitydecl.c +conditional.lo: src/conditional.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT conditional.lo -MD -MP -MF $(DEPDIR)/conditional.Tpo -c -o conditional.lo `test -f 'src/conditional.c' || echo '$(srcdir)/'`src/conditional.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/conditional.Tpo $(DEPDIR)/conditional.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='src/conditional.c' object='conditional.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o conditional.lo `test -f 'src/conditional.c' || echo '$(srcdir)/'`src/conditional.c + parser_example.o: examples/parser_example.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT parser_example.o -MD -MP -MF $(DEPDIR)/parser_example.Tpo -c -o parser_example.o `test -f 'examples/parser_example.c' || echo '$(srcdir)/'`examples/parser_example.c @am__fastdepCC_TRUE@ mv -f $(DEPDIR)/parser_example.Tpo $(DEPDIR)/parser_example.Po Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/include/faxpp/error.h 2008-03-14 15:24:54 UTC (rev 43) @@ -68,6 +68,7 @@ INVALID_ATTRIBUTE_TYPE, INVALID_DEFAULTDECL, INVALID_ELEMENTDECL_CONTENT, + INVALID_CONDITIONAL_SECTION, OUT_OF_MEMORY, ELEMENT_NAME_MISMATCH, Modified: trunk/faxpp/include/faxpp/token.h =================================================================== --- trunk/faxpp/include/faxpp/token.h 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/include/faxpp/token.h 2008-03-14 15:24:54 UTC (rev 43) @@ -91,8 +91,19 @@ ATTLISTDECL_NAME_TOKEN, ATTLISTDECL_ATTDEF_PREFIX_TOKEN, ATTLISTDECL_ATTDEF_NAME_TOKEN, + ATTLISTDECL_ATTTYPE_ENTITY_TOKEN, + ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN, + ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN, + ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN, + ATTLISTDECL_ATTTYPE_ID_TOKEN, + ATTLISTDECL_ATTTYPE_IDREF_TOKEN, + ATTLISTDECL_ATTTYPE_IDREFS_TOKEN, + ATTLISTDECL_ATTTYPE_CDATA_TOKEN, ATTLISTDECL_NOTATION_NAME_TOKEN, ATTLISTDECL_ENUMERATION_NAME_TOKEN, + ATTLISTDECL_DEFAULT_IMPLIED_TOKEN, + ATTLISTDECL_DEFAULT_REQUIRED_TOKEN, + ATTLISTDECL_DEFAULT_FIXED_TOKEN, ATTLISTDECL_END_TOKEN, NOTATIONDECL_NAME_TOKEN, Modified: trunk/faxpp/src/attlistdecl.c =================================================================== --- trunk/faxpp/src/attlistdecl.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/attlistdecl.c 2008-03-14 15:24:54 UTC (rev 43) @@ -17,7 +17,7 @@ #include "tokenizer_states.h" #include "char_classes.h" -#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \ +#define SINGLE_CHAR_STATE_RETURN(name, ch, next_stored_state, next_state, error, return_token) \ FAXPP_Error \ name(FAXPP_TokenizerEnv *env) \ { \ @@ -27,6 +27,7 @@ case (ch): \ if((next_stored_state) != 0) env->stored_state = (next_stored_state); \ env->state = (next_state); \ + if((return_token) != NO_TOKEN) { report_empty_token((return_token), env); } \ next_char(env); \ break; \ LINE_ENDINGS \ @@ -37,6 +38,8 @@ return NO_ERROR; \ } +#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) SINGLE_CHAR_STATE_RETURN(name, ch, next_stored_state, next_state, error, NO_TOKEN) + SINGLE_CHAR_STATE(attlistdecl_initial_state1, 'T', 0, attlistdecl_initial_state2, INVALID_DTD_DECL) SINGLE_CHAR_STATE(attlistdecl_initial_state2, 'T', 0, attlistdecl_initial_state3, INVALID_DTD_DECL) SINGLE_CHAR_STATE(attlistdecl_initial_state3, 'L', 0, attlistdecl_initial_state4, INVALID_DTD_DECL) @@ -316,9 +319,9 @@ switch(env->current_char) { case 'Y': - // TBD Tokens for these - jpcs env->stored_state = attlistdecl_default_state1; env->state = ws_state; + report_empty_token(ATTLISTDECL_ATTTYPE_ENTITY_TOKEN, env); break; case 'I': env->state = attlistdecl_atttype_entities_state1; @@ -333,10 +336,8 @@ return NO_ERROR; } -// TBD Tokens for these - jpcs - SINGLE_CHAR_STATE(attlistdecl_atttype_entities_state1, 'E', 0, attlistdecl_atttype_entities_state2, INVALID_ATTRIBUTE_TYPE) -SINGLE_CHAR_STATE(attlistdecl_atttype_entities_state2, 'S', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE_RETURN(attlistdecl_atttype_entities_state2, 'S', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE, ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN) FAXPP_Error attlistdecl_atttype_nmtoken_state1(FAXPP_TokenizerEnv *env) @@ -373,14 +374,14 @@ switch(env->current_char) { WHITESPACE: - // TBD Tokens for these - jpcs env->stored_state = attlistdecl_default_state1; env->state = ws_state; + report_empty_token(ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN, env); break; case 'S': - // TBD Tokens for these - jpcs env->stored_state = attlistdecl_default_state1; env->state = ws_plus_state; + report_empty_token(ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN, env); break; default: next_char(env); @@ -391,8 +392,6 @@ return NO_ERROR; } -// TBD Tokens for these - jpcs - SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state1, 'T', 0, attlistdecl_atttype_notation_state2, INVALID_ATTRIBUTE_TYPE) SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state2, 'A', 0, attlistdecl_atttype_notation_state3, INVALID_ATTRIBUTE_TYPE) SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state3, 'T', 0, attlistdecl_atttype_notation_state4, INVALID_ATTRIBUTE_TYPE) @@ -504,9 +503,9 @@ switch(env->current_char) { WHITESPACE: - // TBD Tokens for these - jpcs env->stored_state = attlistdecl_default_state1; env->state = ws_state; + report_empty_token(ATTLISTDECL_ATTTYPE_ID_TOKEN, env); break; case 'R': env->state = attlistdecl_atttype_idref_state1; @@ -530,14 +529,14 @@ switch(env->current_char) { WHITESPACE: - // TBD Tokens for these - jpcs env->stored_state = attlistdecl_default_state1; env->state = ws_state; + report_empty_token(ATTLISTDECL_ATTTYPE_IDREF_TOKEN, env); break; case 'S': - // TBD Tokens for these - jpcs env->stored_state = attlistdecl_default_state1; env->state = ws_plus_state; + report_empty_token(ATTLISTDECL_ATTTYPE_IDREFS_TOKEN, env); break; default: next_char(env); @@ -548,12 +547,10 @@ return NO_ERROR; } -// TBD Tokens for these - jpcs - SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state1, 'D', 0, attlistdecl_atttype_cdata_state2, INVALID_ATTRIBUTE_TYPE) SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state2, 'A', 0, attlistdecl_atttype_cdata_state3, INVALID_ATTRIBUTE_TYPE) SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state3, 'T', 0, attlistdecl_atttype_cdata_state4, INVALID_ATTRIBUTE_TYPE) -SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state4, 'A', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE_RETURN(attlistdecl_atttype_cdata_state4, 'A', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE, ATTLISTDECL_ATTTYPE_CDATA_TOKEN) FAXPP_Error attlistdecl_atttype_enumeration_name_state1(FAXPP_TokenizerEnv *env) @@ -688,14 +685,12 @@ return NO_ERROR; } -// TBD Tokens for these - jpcs - SINGLE_CHAR_STATE(attlistdecl_default_implied_state1, 'M', 0, attlistdecl_default_implied_state2, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_implied_state2, 'P', 0, attlistdecl_default_implied_state3, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_implied_state3, 'L', 0, attlistdecl_default_implied_state4, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_implied_state4, 'I', 0, attlistdecl_default_implied_state5, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_implied_state5, 'E', 0, attlistdecl_default_implied_state6, INVALID_DEFAULTDECL) -SINGLE_CHAR_STATE(attlistdecl_default_implied_state6, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE_RETURN(attlistdecl_default_implied_state6, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_IMPLIED_TOKEN) SINGLE_CHAR_STATE(attlistdecl_default_required_state1, 'E', 0, attlistdecl_default_required_state2, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_required_state2, 'Q', 0, attlistdecl_default_required_state3, INVALID_DEFAULTDECL) @@ -703,12 +698,12 @@ SINGLE_CHAR_STATE(attlistdecl_default_required_state4, 'I', 0, attlistdecl_default_required_state5, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_required_state5, 'R', 0, attlistdecl_default_required_state6, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_required_state6, 'E', 0, attlistdecl_default_required_state7, INVALID_DEFAULTDECL) -SINGLE_CHAR_STATE(attlistdecl_default_required_state7, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE_RETURN(attlistdecl_default_required_state7, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_REQUIRED_TOKEN) SINGLE_CHAR_STATE(attlistdecl_default_fixed_state1, 'I', 0, attlistdecl_default_fixed_state2, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_fixed_state2, 'X', 0, attlistdecl_default_fixed_state3, INVALID_DEFAULTDECL) SINGLE_CHAR_STATE(attlistdecl_default_fixed_state3, 'E', 0, attlistdecl_default_fixed_state4, INVALID_DEFAULTDECL) -SINGLE_CHAR_STATE(attlistdecl_default_fixed_state4, 'D', attlistdecl_attvalue_start_state, ws_plus_state, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE_RETURN(attlistdecl_default_fixed_state4, 'D', attlistdecl_attvalue_start_state, ws_plus_state, INVALID_DEFAULTDECL, ATTLISTDECL_DEFAULT_FIXED_TOKEN) FAXPP_Error attlistdecl_attvalue_start_state(FAXPP_TokenizerEnv *env) Added: trunk/faxpp/src/conditional.c =================================================================== --- trunk/faxpp/src/conditional.c (rev 0) +++ trunk/faxpp/src/conditional.c 2008-03-14 15:24:54 UTC (rev 43) @@ -0,0 +1,197 @@ +/* + * Copyright 2007 Doxological Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tokenizer_states.h" +#include "char_classes.h" + +#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \ +FAXPP_Error \ +name(FAXPP_TokenizerEnv *env) \ +{ \ + read_char(env); \ +\ + switch(env->current_char) { \ + case (ch): \ + if((next_stored_state) != 0) env->stored_state = (next_stored_state); \ + env->state = (next_state); \ + next_char(env); \ + break; \ + LINE_ENDINGS \ + default: \ + next_char(env); \ + return (error); \ + } \ + return NO_ERROR; \ +} + +SINGLE_CHAR_STATE(conditional_state1, 'I', 0, conditional_state2, INVALID_CONDITIONAL_SECTION) + +FAXPP_Error +conditional_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case 'N': + env->state = include_state1; + env->nesting_level += 1; + break; + case 'G': + env->state = ignore_state1; + env->nesting_level += 1; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_CONDITIONAL_SECTION; + } + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(ignore_state1, 'N', 0, ignore_state2, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(ignore_state2, 'O', 0, ignore_state3, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(ignore_state3, 'R', 0, ignore_state4, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(ignore_state4, 'E', ignore_state5, ws_state, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(ignore_state5, '[', 0, ignore_content_state, INVALID_CONDITIONAL_SECTION) + +FAXPP_Error +ignore_content_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '<': + env->state = ignore_content_seen_lt_state; + break; + case ']': + env->state = ignore_content_seen_rsquare_state1; + break; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +ignore_content_seen_lt_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '!': + env->state = ignore_content_seen_bang_state; + break; + LINE_ENDINGS + default: + env->state = ignore_content_state; + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +ignore_content_seen_bang_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + env->state = ignore_content_state; + + switch(env->current_char) { + case '[': + env->nesting_level += 1; + break; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +ignore_content_seen_rsquare_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case ']': + env->state = ignore_content_seen_rsquare_state2; + break; + LINE_ENDINGS + default: + env->state = ignore_content_state; + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +ignore_content_seen_rsquare_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case ']': + break; + case '>': + env->nesting_level -= 1; + if(env->nesting_level == 0) { + base_state(env); + } + else { + env->state = ignore_content_state; + } + break; + LINE_ENDINGS + default: + env->state = ignore_content_state; + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(include_state1, 'C', 0, include_state2, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(include_state2, 'L', 0, include_state3, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(include_state3, 'U', 0, include_state4, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(include_state4, 'D', 0, include_state5, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(include_state5, 'E', include_state6, ws_state, INVALID_CONDITIONAL_SECTION) +SINGLE_CHAR_STATE(include_state6, '[', 0, external_subset_state, INVALID_CONDITIONAL_SECTION) Modified: trunk/faxpp/src/doctype.c =================================================================== --- trunk/faxpp/src/doctype.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/doctype.c 2008-03-14 15:24:54 UTC (rev 43) @@ -557,7 +557,7 @@ break; default: next_char(env); - return INVALID_DOCTYPE_DECL; + return INVALID_DTD_DECL; } next_char(env); @@ -583,7 +583,7 @@ break; default: next_char(env); - return INVALID_DOCTYPE_DECL; + return INVALID_DTD_DECL; } next_char(env); @@ -681,9 +681,16 @@ case '<': env->state = external_subset_markup_state; break; + case ']': + // Check if we're in an include section + if(env->nesting_level != 0) { + env->state = external_subset_seen_rsquare_state1; + break; + } + // Fall through default: next_char(env); - return INVALID_DOCTYPE_DECL; + return INVALID_DTD_DECL; } next_char(env); @@ -691,6 +698,48 @@ } FAXPP_Error +external_subset_seen_rsquare_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case ']': + env->state = external_subset_seen_rsquare_state2; + break; + default: + base_state(env); + // No next_char + return INVALID_DTD_DECL; + } + + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +external_subset_seen_rsquare_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case ']': + next_char(env); + return INVALID_DTD_DECL; + case '>': + env->nesting_level -= 1; + base_state(env); + break; + default: + base_state(env); + // No next_char + return INVALID_DTD_DECL; + } + + next_char(env); + return NO_ERROR; +} + +FAXPP_Error external_subset_markup_state(FAXPP_TokenizerEnv *env) { read_char(env); @@ -722,9 +771,10 @@ case '-': env->state = comment_start_state2; break; -/* // TBD conditional sections - jpcs */ -/* case '[': */ -/* break; */ + case '[': + env->stored_state = conditional_state1; + env->state = ws_state; + break; case 'E': env->state = elementdecl_or_entitydecl_state; break; Modified: trunk/faxpp/src/elementdecl.c =================================================================== --- trunk/faxpp/src/elementdecl.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/elementdecl.c 2008-03-14 15:24:54 UTC (rev 43) @@ -178,7 +178,7 @@ env->state = elementdecl_any_state1; break; case '(': - env->nesting_level += 1; + env->elemdecl_content_level += 1; env->stored_state = elementdecl_mixed_or_children_state; env->state = ws_state; report_empty_token(ELEMENTDECL_LPAR_TOKEN, env); @@ -226,7 +226,7 @@ switch(env->current_char) { case '(': - env->nesting_level += 1; + env->elemdecl_content_level += 1; env->stored_state = elementdecl_cp_name_state1; env->state = ws_state; report_empty_token(ELEMENTDECL_LPAR_TOKEN, env); @@ -359,7 +359,7 @@ { read_char(env); - if(env->nesting_level == 0) + if(env->elemdecl_content_level == 0) env->stored_state = elementdecl_end_state; else env->stored_state = elementdecl_cp_separator_or_end_state; @@ -401,7 +401,7 @@ report_empty_token(ELEMENTDECL_COMMA_TOKEN, env); break; case ')': - env->nesting_level -= 1; + env->elemdecl_content_level -= 1; env->state = elementdecl_cp_cardinality_state; report_empty_token(ELEMENTDECL_RPAR_TOKEN, env); break; @@ -428,7 +428,7 @@ switch(env->current_char) { case ')': - env->nesting_level -= 1; + env->elemdecl_content_level -= 1; env->state = elementdecl_pcdata_optional_star_state; report_empty_token(ELEMENTDECL_RPAR_TOKEN, env); break; @@ -471,7 +471,7 @@ switch(env->current_char) { case ')': - env->nesting_level -= 1; + env->elemdecl_content_level -= 1; env->state = elementdecl_pcdata_star_state; report_empty_token(ELEMENTDECL_RPAR_TOKEN, env); break; Modified: trunk/faxpp/src/error.c =================================================================== --- trunk/faxpp/src/error.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/error.c 2008-03-14 15:24:54 UTC (rev 43) @@ -131,6 +131,8 @@ return "INVALID_DEFAULTDECL"; case INVALID_ELEMENTDECL_CONTENT: return "INVALID_ELEMENTDECL_CONTENT"; + case INVALID_CONDITIONAL_SECTION: + return "INVALID_CONDITIONAL_SECTION"; case NO_ERROR: break; } Modified: trunk/faxpp/src/token.c =================================================================== --- trunk/faxpp/src/token.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/token.c 2008-03-14 15:24:54 UTC (rev 43) @@ -134,10 +134,32 @@ return "ATTLISTDECL_ATTDEF_PREFIX_TOKEN"; case ATTLISTDECL_ATTDEF_NAME_TOKEN: return "ATTLISTDECL_ATTDEF_NAME_TOKEN"; + case ATTLISTDECL_ATTTYPE_ENTITY_TOKEN: + return "ATTLISTDECL_ATTTYPE_ENTITY_TOKEN"; + case ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN: + return "ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN"; + case ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN: + return "ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN"; + case ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN: + return "ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN"; + case ATTLISTDECL_ATTTYPE_ID_TOKEN: + return "ATTLISTDECL_ATTTYPE_ID_TOKEN"; + case ATTLISTDECL_ATTTYPE_IDREF_TOKEN: + return "ATTLISTDECL_ATTTYPE_IDREF_TOKEN"; + case ATTLISTDECL_ATTTYPE_IDREFS_TOKEN: + return "ATTLISTDECL_ATTTYPE_IDREFS_TOKEN"; + case ATTLISTDECL_ATTTYPE_CDATA_TOKEN: + return "ATTLISTDECL_ATTTYPE_CDATA_TOKEN"; case ATTLISTDECL_NOTATION_NAME_TOKEN: return "ATTLISTDECL_NOTATION_NAME_TOKEN"; case ATTLISTDECL_ENUMERATION_NAME_TOKEN: return "ATTLISTDECL_ENUMERATION_NAME_TOKEN"; + case ATTLISTDECL_DEFAULT_IMPLIED_TOKEN: + return "ATTLISTDECL_DEFAULT_IMPLIED_TOKEN"; + case ATTLISTDECL_DEFAULT_REQUIRED_TOKEN: + return "ATTLISTDECL_DEFAULT_REQUIRED_TOKEN"; + case ATTLISTDECL_DEFAULT_FIXED_TOKEN: + return "ATTLISTDECL_DEFAULT_FIXED_TOKEN"; case ATTLISTDECL_END_TOKEN: return "ATTLISTDECL_END_TOKEN"; Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/tokenizer_states.c 2008-03-14 15:24:54 UTC (rev 43) @@ -435,6 +435,10 @@ else if(state == external_subset_state) return "external_subset_state"; + else if(state == external_subset_seen_rsquare_state1) + return "external_subset_seen_rsquare_state1"; + else if(state == external_subset_seen_rsquare_state2) + return "external_subset_seen_rsquare_state2"; else if(state == external_subset_markup_state) return "external_subset_markup_state"; else if(state == external_subset_decl_state) @@ -789,6 +793,43 @@ else if(state == paramentitydecl_end_state) return "paramentitydecl_end_state"; + else if(state == conditional_state1) + return "conditional_state1"; + else if(state == conditional_state2) + return "conditional_state2"; + else if(state == ignore_state1) + return "ignore_state1"; + else if(state == ignore_state2) + return "ignore_state2"; + else if(state == ignore_state3) + return "ignore_state3"; + else if(state == ignore_state4) + return "ignore_state4"; + else if(state == ignore_state5) + return "ignore_state5"; + else if(state == ignore_content_state) + return "ignore_content_state"; + else if(state == ignore_content_seen_lt_state) + return "ignore_content_seen_lt_state"; + else if(state == ignore_content_seen_bang_state) + return "ignore_content_seen_bang_state"; + else if(state == ignore_content_seen_rsquare_state1) + return "ignore_content_seen_rsquare_state1"; + else if(state == ignore_content_seen_rsquare_state2) + return "ignore_content_seen_rsquare_state2"; + else if(state == include_state1) + return "include_state1"; + else if(state == include_state2) + return "include_state2"; + else if(state == include_state3) + return "include_state3"; + else if(state == include_state4) + return "include_state4"; + else if(state == include_state5) + return "include_state5"; + else if(state == include_state6) + return "include_state6"; + return "unknown"; } #endif Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/tokenizer_states.h 2008-03-14 15:24:54 UTC (rev 43) @@ -266,6 +266,8 @@ FAXPP_Error internal_subset_decl_state(FAXPP_TokenizerEnv *env); FAXPP_Error external_subset_state(FAXPP_TokenizerEnv *env); +FAXPP_Error external_subset_seen_rsquare_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error external_subset_seen_rsquare_state2(FAXPP_TokenizerEnv *env); FAXPP_Error external_subset_markup_state(FAXPP_TokenizerEnv *env); FAXPP_Error external_subset_decl_state(FAXPP_TokenizerEnv *env); @@ -446,7 +448,26 @@ FAXPP_Error paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env); FAXPP_Error paramentitydecl_end_state(FAXPP_TokenizerEnv *env); +FAXPP_Error conditional_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error conditional_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_content_seen_lt_state(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_content_seen_bang_state(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_content_seen_rsquare_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error ignore_content_seen_rsquare_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error include_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error include_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error include_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error include_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error include_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error include_state6(FAXPP_TokenizerEnv *env); + /********************* * * Tokenizer Helper Functions @@ -527,20 +548,18 @@ #define base_state(env) \ { \ - if((env)->nesting_level != 0) \ + if((env)->external_subset || (env)->external_dtd_entity) \ + (env)->state = external_subset_state; \ + else if((env)->nesting_level != 0) \ (env)->state = (env)->element_content_state; \ - else if((env)->element_entity) \ - (env)->state = parsed_entity_state; \ else if((env)->internal_dtd_entity) \ (env)->state = internal_subset_state_en; \ - else if((env)->external_parsed_entity) \ + else if((env)->element_entity || (env)->external_parsed_entity) \ (env)->state = parsed_entity_state; \ else if((env)->seen_doc_element) \ (env)->state = final_state; \ else if((env)->internal_subset) \ (env)->state = internal_subset_state; \ - else if((env)->external_subset) \ - (env)->state = external_subset_state; \ else (env)->state = initial_misc_state; \ } Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/xml_parser.c 2008-03-14 15:24:54 UTC (rev 43) @@ -111,9 +111,16 @@ FAXPP_AttrValue *at; FAXPP_ElementInfo *el; FAXPP_NamespaceInfo *ns; + FAXPP_ContentSpec *cs; if(env->attrs) free(env->attrs); + while(env->current_elementdecl) { + cs = env->current_elementdecl; + env->current_elementdecl = cs->parent; + free(cs); + } + while(env->av_dealloc) { at = env->av_dealloc; env->av_dealloc = at->dealloc_next; @@ -248,10 +255,23 @@ { FAXPP_ElementInfo *el; FAXPP_NamespaceInfo *ns; + FAXPP_ContentSpec *cs; env->tenv->buffered_token = 0; env->tenv->user_provided_decode = 0; + // Free the elementdecl stack + while(env->current_elementdecl) { + cs = env->current_elementdecl; + env->current_elementdecl = cs->parent; + free(cs); + } + + env->current_attr = 0; + env->current_entity = 0; + env->current_attlist = 0; + env->current_notation = 0; + // Put the element info objects back in the pool while(env->element_info_stack) { el = env->element_info_stack; @@ -408,13 +428,15 @@ //////////////////////////////////////////////////////////////////////////////////////////////////// -static void p_text_change_buffer(FAXPP_Buffer *buffer, void *newBuffer, FAXPP_Text *text) +static void p_change_buffer(FAXPP_Buffer *buffer, void *newBuffer, void **text) { - if(text->ptr >= buffer->buffer && text->ptr < (buffer->buffer + buffer->length)) { - text->ptr += newBuffer - buffer->buffer; + if(*text >= buffer->buffer && *text < (buffer->buffer + buffer->length)) { + *text += newBuffer - buffer->buffer; } } +#define p_text_change_buffer(buffer, newBuffer, text) p_change_buffer((buffer), (newBuffer), &(text)->ptr) + static void p_change_event_buffer(void *userData, FAXPP_Buffer *buffer, void *newBuffer) { unsigned int i; @@ -487,13 +509,19 @@ while(tokenizer) { p_text_change_buffer(buffer, newBuffer, &tokenizer->base_uri); + // The tokenizer buffer can also point into the entity_buffer, so change that too + p_change_buffer(buffer, newBuffer, &tokenizer->buffer); + p_change_buffer(buffer, newBuffer, &tokenizer->buffer_end); + p_change_buffer(buffer, newBuffer, &tokenizer->position); + tokenizer = tokenizer->prev; } } #define p_move_text_to_buffer(env, text, buf) \ { \ - if((text)->ptr >= (env)->tenv->buffer && (text)->ptr < (env)->tenv->buffer_end) { \ + if((text)->ptr >= (env)->tenv->buffer && (text)->ptr < (env)->tenv->buffer_end && \ + ((text)->ptr < (buf)->buffer || (text)->ptr >= ((buf)->buffer + (buf)->length))) { \ void *newPtr = (buf)->cursor; \ FAXPP_Error err = FAXPP_buffer_append((buf), (text)->ptr, (text)->len); \ if((env)->tenv->null_terminate && err == 0) \ @@ -915,10 +943,10 @@ unsigned int readlen; FAXPP_Error err; + err = FAXPP_release_buffer(env, 0); + if(err != 0) return err; + if(env->tenv->read && !env->tenv->buffer_done) { - err = FAXPP_release_buffer(env, 0); - if(err != 0) return err; - if(env->tenv->position < env->tenv->buffer_end) { // We're half way through a charcter, so we need to copy // the partial char to the begining of the buffer to keep @@ -942,7 +970,7 @@ if(env->tenv->attr_entity) { // TBD default attr values - jpcs if(!env->tenv->prev->internal_subset && !env->tenv->prev->external_subset && - !env->tenv->prev->internal_dtd_entity) { + !env->tenv->prev->internal_dtd_entity && !env->tenv->prev->external_dtd_entity) { err = p_set_attr_value_name_from_entity(env->current_attr, env, ENTITY_REFERENCE_END_EVENT, env->tenv->entity); if(err) return err; } @@ -1266,7 +1294,14 @@ } if(ent->external) { - return p_parse_external_entity(env, ent, state - INTERNAL_DIFF); + switch(state) { + case ELEMENT_CONTENT_ENTITY: state = EXTERNAL_PARSED_ENTITY; break; + case INTERNAL_DTD_ENTITY: state = EXTERNAL_SUBSET_ENTITY; break; + case EXTERNAL_DTD_ENTITY: state = EXTERNAL_SUBSET_ENTITY; break; + default: break; + } + + return p_parse_external_entity(env, ent, state); } return p_parse_internal_entity(env, ent, state, &ent); @@ -1314,6 +1349,7 @@ { FAXPP_EntityInfo *ent; FAXPP_EntityValue *entv; + FAXPP_ContentSpec *cs; FAXPP_Text bkup_system, bkup_public; Char32 ch; FAXPP_Error err = 0; @@ -1467,6 +1503,7 @@ entv->value.len = env->entity_buffer.cursor - entv->value.ptr; } else if(env->current_attlist) { + // General entities in ATTLIST values should be looked up straight away ent = p_find_entity_info(&env->tenv->result_token.value, env->general_entities); if(ent == 0) { err = UNDEFINED_ENTITY; @@ -1518,7 +1555,7 @@ p_set_text_from_text(&bkup_system, &env->event.system_id); p_set_text_from_text(&bkup_public, &env->event.public_id); - err = p_parse_entity(env, ent, INTERNAL_DTD_ENTITY); + err = p_parse_entity(env, ent, env->tenv->internal_subset ? INTERNAL_DTD_ENTITY : EXTERNAL_DTD_ENTITY); p_set_text_from_text(&env->event.system_id, &bkup_system); p_set_text_from_text(&env->event.public_id, &bkup_public); @@ -1527,6 +1564,36 @@ } break; + case ELEMENTDECL_LPAR_TOKEN: + cs = (FAXPP_ContentSpec*)malloc(sizeof(FAXPP_ContentSpec)); + memset(cs, 0, sizeof(FAXPP_ContentSpec)); + cs->parent = env->current_elementdecl; + env->current_elementdecl = cs; + break; + case ELEMENTDECL_RPAR_TOKEN: + cs = env->current_elementdecl; + env->current_elementdecl = cs->parent; + free(cs); + break; + case ELEMENTDECL_BAR_TOKEN: + if(env->current_elementdecl->type == CONTENTSPEC_NONE) { + env->current_elementdecl->type = CONTENTSPEC_CHOICE; + } + else if(env->current_elementdecl->type != CONTENTSPEC_CHOICE) { + err = INVALID_ELEMENTDECL_CONTENT; + goto error; + } + break; + case ELEMENTDECL_COMMA_TOKEN: + if(env->current_elementdecl->type == CONTENTSPEC_NONE) { + env->current_elementdecl->type = CONTENTSPEC_SEQUENCE; + } + else if(env->current_elementdecl->type != CONTENTSPEC_SEQUENCE) { + err = INVALID_ELEMENTDECL_CONTENT; + goto error; + } + break; + case ATTLISTDECL_PREFIX_TOKEN: case ATTLISTDECL_NAME_TOKEN: env->current_attlist = 1; @@ -1560,24 +1627,32 @@ case DOCTYPE_NAME_TOKEN: p_copy_text_from_token(&env->event.name, env, /*useTokenBuffer*/0); break; + case ELEMENTDECL_PREFIX_TOKEN: case ELEMENTDECL_NAME_TOKEN: case ELEMENTDECL_EMPTY_TOKEN: case ELEMENTDECL_ANY_TOKEN: case ELEMENTDECL_PCDATA_TOKEN: - case ELEMENTDECL_LPAR_TOKEN: - case ELEMENTDECL_RPAR_TOKEN: case ELEMENTDECL_QUESTION_TOKEN: case ELEMENTDECL_STAR_TOKEN: case ELEMENTDECL_PLUS_TOKEN: - case ELEMENTDECL_BAR_TOKEN: - case ELEMENTDECL_COMMA_TOKEN: case ELEMENTDECL_END_TOKEN: case ATTLISTDECL_ATTDEF_PREFIX_TOKEN: case ATTLISTDECL_ATTDEF_NAME_TOKEN: + case ATTLISTDECL_ATTTYPE_ENTITY_TOKEN: + case ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN: + case ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN: + case ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN: + case ATTLISTDECL_ATTTYPE_ID_TOKEN: + case ATTLISTDECL_ATTTYPE_IDREF_TOKEN: + case ATTLISTDECL_ATTTYPE_IDREFS_TOKEN: + case ATTLISTDECL_ATTTYPE_CDATA_TOKEN: case ATTLISTDECL_NOTATION_NAME_TOKEN: case ATTLISTDECL_ENUMERATION_NAME_TOKEN: + case ATTLISTDECL_DEFAULT_IMPLIED_TOKEN: + case ATTLISTDECL_DEFAULT_REQUIRED_TOKEN: + case ATTLISTDECL_DEFAULT_FIXED_TOKEN: // Ignore for now break; case COMMENT_TOKEN: @@ -1641,8 +1716,19 @@ case ATTLISTDECL_NAME_TOKEN: case ATTLISTDECL_ATTDEF_PREFIX_TOKEN: case ATTLISTDECL_ATTDEF_NAME_TOKEN: + case ATTLISTDECL_ATTTYPE_ENTITY_TOKEN: + case ATTLISTDECL_ATTTYPE_ENTITIES_TOKEN: + case ATTLISTDECL_ATTTYPE_NMTOKEN_TOKEN: + case ATTLISTDECL_ATTTYPE_NMTOKENS_TOKEN: + case ATTLISTDECL_ATTTYPE_ID_TOKEN: + case ATTLISTDECL_ATTTYPE_IDREF_TOKEN: + case ATTLISTDECL_ATTTYPE_IDREFS_TOKEN: + case ATTLISTDECL_ATTTYPE_CDATA_TOKEN: case ATTLISTDECL_NOTATION_NAME_TOKEN: case ATTLISTDECL_ENUMERATION_NAME_TOKEN: + case ATTLISTDECL_DEFAULT_IMPLIED_TOKEN: + case ATTLISTDECL_DEFAULT_REQUIRED_TOKEN: + case ATTLISTDECL_DEFAULT_FIXED_TOKEN: case ATTLISTDECL_END_TOKEN: case NOTATIONDECL_NAME_TOKEN: case NOTATIONDECL_END_TOKEN: Modified: trunk/faxpp/src/xml_parser.h =================================================================== --- trunk/faxpp/src/xml_parser.h 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/xml_parser.h 2008-03-14 15:24:54 UTC (rev 43) @@ -75,6 +75,19 @@ FAXPP_EntityInfo *next; }; +typedef enum { + CONTENTSPEC_NONE = 0, + CONTENTSPEC_SEQUENCE, + CONTENTSPEC_CHOICE +} FAXPP_ContentSpecType; + +typedef struct FAXPP_ContentSpec_s FAXPP_ContentSpec; + +struct FAXPP_ContentSpec_s { + FAXPP_ContentSpecType type; + FAXPP_ContentSpec *parent; +}; + typedef struct FAXPP_ParserEnv_s FAXPP_ParserEnv; typedef FAXPP_Error (*FAXPP_NextEvent)(FAXPP_ParserEnv *env); @@ -96,8 +109,10 @@ unsigned int max_attr_count; FAXPP_Attribute *attrs; + FAXPP_Attribute *current_attr; FAXPP_EntityInfo *current_entity; + FAXPP_ContentSpec *current_elementdecl; unsigned int current_attlist:1; unsigned int current_notation:1; Modified: trunk/faxpp/src/xml_tokenizer.c =================================================================== --- trunk/faxpp/src/xml_tokenizer.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/xml_tokenizer.c 2008-03-14 15:24:54 UTC (rev 43) @@ -32,6 +32,8 @@ #define INITIAL_TOKEN_BUFFER_SIZE 64 +#define SNIFF_NEXT_CHAR(buf) (((buf) < (unsigned char*)env->buffer_end) ? *(buf)++ : 0x100) + FAXPP_Error FAXPP_sniff_encoding(FAXPP_Tokenizer *env) { @@ -41,13 +43,13 @@ /* printf("First bytes: %02X %02X %02X %02X\n", *buf, *(buf + 1), */ /* *(buf + 2), *(buf + 3)); */ - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x3C: /* 00 00 00 3C UCS-4, big-endian machine (1234 order) */ #ifdef WORDS_BIGENDIAN @@ -59,14 +61,14 @@ } break; case 0x3C: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: /* 00 00 3C 00 UCS-4, unusual octet order (2143) */ return UNSUPPORTED_ENCODING; } break; case 0xFE: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xFF: /* 00 00 FE FF UCS-4, big-endian machine (1234 order) */ #ifdef WORDS_BIGENDIAN @@ -80,7 +82,7 @@ } break; case 0xFF: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xFE: /* 00 00 FF FE UCS-4, unusual octet order (2143) */ return UNSUPPORTED_ENCODING; @@ -89,9 +91,9 @@ } break; case 0x3C: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: /* 00 3C 00 00 UCS-4, unusual octet order (3412) */ return UNSUPPORTED_ENCODING; @@ -110,11 +112,11 @@ } break; case 0x3C: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: /* 3C 00 00 00 UCS-4, little-endian machine (4321 order) */ #ifdef WORDS_BIGENDIAN @@ -126,7 +128,7 @@ } break; case 0x3F: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: /* 3C 00 3F 00 UTF-16, little-endian */ #ifdef WORDS_BIGENDIAN @@ -140,9 +142,9 @@ } break; case 0x3F: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x78: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x6D: /* 3C 3F 78 6D UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, etc. */ FAXPP_set_tokenizer_decode(env, FAXPP_utf8_decode); @@ -154,11 +156,11 @@ } break; case 0x4C: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x6F: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xA7: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x94: /* 4C 6F A7 94 EBCDIC */ return UNSUPPORTED_ENCODING; @@ -169,9 +171,9 @@ } break; case 0xEF: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xBB: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xBF: /* EF BB BF UTF-8 with byte order mark */ FAXPP_set_tokenizer_decode(env, FAXPP_utf8_decode); @@ -182,11 +184,11 @@ } break; case 0xFE: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xFF: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: /* FE FF 00 00 UCS-4, unusual octet order (3412) */ return UNSUPPORTED_ENCODING; @@ -217,11 +219,11 @@ } break; case 0xFF: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0xFE: - switch(*buf++) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: - switch(*buf) { + switch(SNIFF_NEXT_CHAR(buf)) { case 0x00: /* FF FE 00 00 UCS-4, little-endian machine (4321 order) */ #ifdef WORDS_BIGENDIAN @@ -389,6 +391,7 @@ env->column = 0; env->nesting_level = 0; + env->elemdecl_content_level = 0; env->do_encode = 1; env->seen_doctype = 0; @@ -398,6 +401,7 @@ env->element_entity = 0; env->attr_entity = 0; env->internal_dtd_entity = 0; + env->external_dtd_entity = 0; env->external_parsed_entity = 0; env->start_of_entity = 0; @@ -463,6 +467,7 @@ env->element_entity = state == ELEMENT_CONTENT_ENTITY; env->attr_entity = state == ATTRIBUTE_VALUE_ENTITY; env->internal_dtd_entity = state == INTERNAL_DTD_ENTITY; + env->external_dtd_entity = state == EXTERNAL_DTD_ENTITY; env->external_parsed_entity = state == EXTERNAL_PARSED_ENTITY; env->external_subset = state == EXTERNAL_SUBSET_ENTITY; @@ -483,6 +488,9 @@ case INTERNAL_DTD_ENTITY: env->state = internal_subset_state_en; break; + case EXTERNAL_DTD_ENTITY: + env->state = external_subset_state; + break; case EXTERNAL_PARSED_ENTITY: case EXTERNAL_SUBSET_ENTITY: env->state = initial_state; @@ -502,17 +510,21 @@ FAXPP_Error FAXPP_pop_tokenizer(FAXPP_Tokenizer **list) { + FAXPP_Error err = NO_ERROR; FAXPP_TokenizerEnv *env = *list; *list = env->prev; if(env->start_of_entity) { - if(env->stored_state != 0 || env->nesting_level != 0 || + if(env->stored_state != 0 || env->nesting_level != 0 || env->elemdecl_content_level != 0 || (env->element_entity && env->state != parsed_entity_state && env->state != default_element_content_rsquare_state1 && env->state != default_element_content_rsquare_state2) || - (env->internal_dtd_entity && env->state != internal_subset_state_en) + (env->internal_dtd_entity && env->state != internal_subset_state_en) || + (env->external_dtd_entity && env->state != external_subset_state && + env->state != external_subset_seen_rsquare_state1 && + env->state != external_subset_seen_rsquare_state2) ) { - return INCOMPLETE_MARKUP_IN_ENTITY_VALUE; + err = INCOMPLETE_MARKUP_IN_ENTITY_VALUE; } } else { @@ -532,6 +544,7 @@ } (*list)->nesting_level += env->nesting_level; + (*list)->elemdecl_content_level += env->elemdecl_content_level; (*list)->state = env->state; (*list)->stored_state = env->stored_state; @@ -541,7 +554,7 @@ free_tokenizer_internal(env); - return NO_ERROR; + return err; } FAXPP_Error Modified: trunk/faxpp/src/xml_tokenizer.h =================================================================== --- trunk/faxpp/src/xml_tokenizer.h 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/src/xml_tokenizer.h 2008-03-14 15:24:54 UTC (rev 43) @@ -51,6 +51,7 @@ unsigned int column; unsigned int nesting_level; + unsigned int elemdecl_content_level; unsigned int do_encode:1; unsigned int buffer_done:1; @@ -61,6 +62,7 @@ unsigned int element_entity:1; unsigned int attr_entity:1; unsigned int internal_dtd_entity:1; + unsigned int external_dtd_entity:1; unsigned int external_parsed_entity:1; unsigned int normalize_attrs:1; @@ -97,15 +99,14 @@ struct FAXPP_TokenizerEnv_s *prev; }; -#define INTERNAL_DIFF 5 - // The first two values are the same as the values in FAXPP_EntityType typedef enum { EXTERNAL_PARSED_ENTITY2 = EXTERNAL_PARSED_ENTITY, EXTERNAL_SUBSET_ENTITY2 = EXTERNAL_SUBSET_ENTITY, - ELEMENT_CONTENT_ENTITY = EXTERNAL_PARSED_ENTITY + INTERNAL_DIFF, - INTERNAL_DTD_ENTITY = EXTERNAL_SUBSET_ENTITY + INTERNAL_DIFF, + ELEMENT_CONTENT_ENTITY, + INTERNAL_DTD_ENTITY, + EXTERNAL_DTD_ENTITY, ATTRIBUTE_VALUE_ENTITY } FAXPP_EntityParseState; Modified: trunk/faxpp/tests/xmlconf_runner.c =================================================================== --- trunk/faxpp/tests/xmlconf_runner.c 2008-03-13 21:56:47 UTC (rev 42) +++ trunk/faxpp/tests/xmlconf_runner.c 2008-03-14 15:24:54 UTC (rev 43) @@ -29,7 +29,7 @@ if(line != 0) { output_text(FAXPP_get_base_uri(parser), stderr); - fprintf(stderr, ":%03d:%03d FAXPP_Error: %s\n", line, FAXPP_get_error_column(parser), FAXPP_err_to_string(err)); + fprintf(stderr, ":%d:%d FAXPP_Error: %s\n", line, FAXPP_get_error_column(parser), FAXPP_err_to_string(err)); } else { fprintf(stderr, "FAXPP_Error: %s\n", FAXPP_err_to_string(err)); } @@ -76,8 +76,11 @@ *ptr = 0; } -FAXPP_Error run_test_case(const char *filename, unsigned int *errLine) +FAXPP_Error run_test_case(const char *filename, char *errFileBuffer, unsigned int bufLen, unsigned int *errLine, unsigned int *errColumn) { + const FAXPP_Text *text; + unsigned int len; + FAXPP_Parser *testparser = FAXPP_create_parser(WELL_FORMED_PARSE_MODE, FAXPP_utf8_transcoder); FILE *file = fopen(filename, "r"); @@ -100,7 +103,13 @@ } if(err != NO_ERROR) { + text = FAXPP_get_base_uri(testparser); + len = text->len < bufLen - 1 ? text->len : bufLen - 1; + memcpy(errFileBuffer, text->ptr, len); + errFileBuffer[len] = 0; + *errLine = FAXPP_get_error_line(testparser); + *errColumn = FAXPP_get_error_column(testparser); } fclose(file); @@ -118,7 +127,9 @@ char base_buffer[1024]; char file_buffer[1024]; FAXPP_Error result; + char errFileBuffer[1024]; unsigned int errLine; + unsigned int errColumn; int output_events = 0; int test_failures = 0; @@ -191,7 +202,7 @@ attr = find_attribute(event, "URI"); calculateBase(base_buffer, &attr->value, file_buffer); - result = run_test_case(file_buffer, &errLine); + result = run_test_case(file_buffer, errFileBuffer, sizeof(errFileBuffer), &errLine, &errColumn); // Skip tests that require no namespaces attr = find_attribute(event, "NAMESPACE"); @@ -251,7 +262,7 @@ } if(result != NO_ERROR) { - fprintf(stderr, "\nError: %s:%i", FAXPP_err_to_string(result), errLine); + fprintf(stderr, "\nError: %s:%d:%d %s\n", errFileBuffer, errLine, errColumn, FAXPP_err_to_string(result)); } fprintf(stderr, "\n"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-03-13 21:56:43
|
Revision: 42 http://faxpp.svn.sourceforge.net/faxpp/?rev=42&view=rev Author: jpcs Date: 2008-03-13 14:56:47 -0700 (Thu, 13 Mar 2008) Log Message: ----------- Added correct tokenization of element declarations. Modified Paths: -------------- trunk/faxpp/include/faxpp/error.h trunk/faxpp/include/faxpp/token.h trunk/faxpp/src/elementdecl.c trunk/faxpp/src/error.c trunk/faxpp/src/token.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/tests/xmlconf_runner.c Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2008-03-11 17:28:12 UTC (rev 41) +++ trunk/faxpp/include/faxpp/error.h 2008-03-13 21:56:47 UTC (rev 42) @@ -67,6 +67,7 @@ DONT_PARSE_EXTERNAL_ENTITY, INVALID_ATTRIBUTE_TYPE, INVALID_DEFAULTDECL, + INVALID_ELEMENTDECL_CONTENT, OUT_OF_MEMORY, ELEMENT_NAME_MISMATCH, Modified: trunk/faxpp/include/faxpp/token.h =================================================================== --- trunk/faxpp/include/faxpp/token.h 2008-03-11 17:28:12 UTC (rev 41) +++ trunk/faxpp/include/faxpp/token.h 2008-03-13 21:56:47 UTC (rev 42) @@ -75,7 +75,17 @@ ELEMENTDECL_PREFIX_TOKEN, ELEMENTDECL_NAME_TOKEN, - ELEMENTDECL_CONTENT_TOKEN, + ELEMENTDECL_EMPTY_TOKEN, + ELEMENTDECL_ANY_TOKEN, + ELEMENTDECL_PCDATA_TOKEN, + ELEMENTDECL_LPAR_TOKEN, + ELEMENTDECL_RPAR_TOKEN, + ELEMENTDECL_QUESTION_TOKEN, + ELEMENTDECL_STAR_TOKEN, + ELEMENTDECL_PLUS_TOKEN, + ELEMENTDECL_BAR_TOKEN, + ELEMENTDECL_COMMA_TOKEN, + ELEMENTDECL_END_TOKEN, ATTLISTDECL_PREFIX_TOKEN, ATTLISTDECL_NAME_TOKEN, Modified: trunk/faxpp/src/elementdecl.c =================================================================== --- trunk/faxpp/src/elementdecl.c 2008-03-11 17:28:12 UTC (rev 41) +++ trunk/faxpp/src/elementdecl.c 2008-03-13 21:56:47 UTC (rev 42) @@ -39,7 +39,7 @@ return NO_ERROR; } -#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \ +#define SINGLE_CHAR_STATE_RETURN(name, ch, next_stored_state, next_state, error, return_token) \ FAXPP_Error \ name(FAXPP_TokenizerEnv *env) \ { \ @@ -49,6 +49,7 @@ case (ch): \ if((next_stored_state) != 0) env->stored_state = (next_stored_state); \ env->state = (next_state); \ + if((return_token) != NO_TOKEN) { report_empty_token((return_token), env); } \ next_char(env); \ break; \ LINE_ENDINGS \ @@ -59,6 +60,8 @@ return NO_ERROR; \ } +#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) SINGLE_CHAR_STATE_RETURN(name, ch, next_stored_state, next_state, error, NO_TOKEN) + SINGLE_CHAR_STATE(elementdecl_initial_state1, 'E', 0, elementdecl_initial_state2, INVALID_DTD_DECL) SINGLE_CHAR_STATE(elementdecl_initial_state2, 'M', 0, elementdecl_initial_state3, INVALID_DTD_DECL) SINGLE_CHAR_STATE(elementdecl_initial_state3, 'E', 0, elementdecl_initial_state4, INVALID_DTD_DECL) @@ -168,14 +171,466 @@ read_char(env); switch(env->current_char) { + case 'E': + env->state = elementdecl_empty_state1; + break; + case 'A': + env->state = elementdecl_any_state1; + break; + case '(': + env->nesting_level += 1; + env->stored_state = elementdecl_mixed_or_children_state; + env->state = ws_state; + report_empty_token(ELEMENTDECL_LPAR_TOKEN, env); + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_ELEMENTDECL_CONTENT; + } + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(elementdecl_empty_state1, 'M', 0, elementdecl_empty_state2, INVALID_ELEMENTDECL_CONTENT) +SINGLE_CHAR_STATE(elementdecl_empty_state2, 'P', 0, elementdecl_empty_state3, INVALID_ELEMENTDECL_CONTENT) +SINGLE_CHAR_STATE(elementdecl_empty_state3, 'T', 0, elementdecl_empty_state4, INVALID_ELEMENTDECL_CONTENT) +SINGLE_CHAR_STATE_RETURN(elementdecl_empty_state4, 'Y', elementdecl_end_state, ws_state, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_EMPTY_TOKEN) + +SINGLE_CHAR_STATE(elementdecl_any_state1, 'N', 0, elementdecl_any_state2, INVALID_ELEMENTDECL_CONTENT) +SINGLE_CHAR_STATE_RETURN(elementdecl_any_state2, 'Y', elementdecl_end_state, ws_state, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_ANY_TOKEN) + +FAXPP_Error +elementdecl_mixed_or_children_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '#': + env->state = elementdecl_pcdata_state1; + break; + default: + env->state = elementdecl_cp_name_state1; + // No next_char + return NO_ERROR; + } + next_char(env); + return NO_ERROR; + +} + +FAXPP_Error +elementdecl_cp_name_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '(': + env->nesting_level += 1; + env->stored_state = elementdecl_cp_name_state1; + env->state = ws_state; + report_empty_token(ELEMENTDECL_LPAR_TOKEN, env); + next_char(env); + break; + LINE_ENDINGS + default: + env->state = elementdecl_cp_name_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ELEMENTDECL_NAME; + break; + } + return NO_ERROR; +} + +FAXPP_Error +elementdecl_cp_name_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = elementdecl_cp_separator_or_end_state; + env->state = ws_state; + token_end_position(env); + report_token(ELEMENTDECL_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + case '?': + case '*': + case '+': + env->state = elementdecl_cp_cardinality_state; + token_end_position(env); + report_token(ELEMENTDECL_NAME_TOKEN, env); + // No next_char + return NO_ERROR; + case '|': + case ',': + case ')': + env->state = elementdecl_cp_separator_or_end_state; + token_end_position(env); + report_token(ELEMENTDECL_NAME_TOKEN, env); + // No next_char + return NO_ERROR; + case ':': + env->state = elementdecl_cp_name_seen_colon_state1; + token_end_position(env); + report_token(ELEMENTDECL_PREFIX_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_ELEMENTDECL_NAME; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +elementdecl_cp_name_seen_colon_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = elementdecl_cp_name_seen_colon_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ELEMENTDECL_NAME; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +elementdecl_cp_name_seen_colon_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = elementdecl_cp_separator_or_end_state; + env->state = ws_state; + token_end_position(env); + report_token(ELEMENTDECL_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + case '?': + case '*': + case '+': + env->state = elementdecl_cp_cardinality_state; + token_end_position(env); + report_token(ELEMENTDECL_NAME_TOKEN, env); + // No next_char + return NO_ERROR; + case '|': + case ',': + case ')': + env->state = elementdecl_cp_separator_or_end_state; + token_end_position(env); + report_token(ELEMENTDECL_NAME_TOKEN, env); + // No next_char + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_ELEMENTDECL_NAME; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +elementdecl_cp_cardinality_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + if(env->nesting_level == 0) + env->stored_state = elementdecl_end_state; + else + env->stored_state = elementdecl_cp_separator_or_end_state; + env->state = ws_state; + + switch(env->current_char) { + case '?': + report_empty_token(ELEMENTDECL_QUESTION_TOKEN, env); + break; + case '*': + report_empty_token(ELEMENTDECL_STAR_TOKEN, env); + break; + case '+': + report_empty_token(ELEMENTDECL_PLUS_TOKEN, env); + break; + default: + // No next_char + return NO_ERROR; + } + + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +elementdecl_cp_separator_or_end_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '|': + env->stored_state = elementdecl_cp_name_state1; + env->state = ws_state; + report_empty_token(ELEMENTDECL_BAR_TOKEN, env); + break; + case ',': + env->stored_state = elementdecl_cp_name_state1; + env->state = ws_state; + report_empty_token(ELEMENTDECL_COMMA_TOKEN, env); + break; + case ')': + env->nesting_level -= 1; + env->state = elementdecl_cp_cardinality_state; + report_empty_token(ELEMENTDECL_RPAR_TOKEN, env); + break; + default: + next_char(env); + return INVALID_ELEMENTDECL_CONTENT; + } + + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(elementdecl_pcdata_state1, 'P', 0, elementdecl_pcdata_state2, INVALID_ELEMENTDECL_CONTENT) +SINGLE_CHAR_STATE(elementdecl_pcdata_state2, 'C', 0, elementdecl_pcdata_state3, INVALID_ELEMENTDECL_CONTENT) +SINGLE_CHAR_STATE(elementdecl_pcdata_state3, 'D', 0, elementdecl_pcdata_state4, INVALID_ELEMENTDECL_CONTENT) +SINGLE_CHAR_STATE(elementdecl_pcdata_state4, 'A', 0, elementdecl_pcdata_state5, INVALID_ELEMENTDECL_CONTENT) +SINGLE_CHAR_STATE(elementdecl_pcdata_state5, 'T', 0, elementdecl_pcdata_state6, INVALID_ELEMENTDECL_CONTENT) +SINGLE_CHAR_STATE_RETURN(elementdecl_pcdata_state6, 'A', elementdecl_pcdata_end_or_names_state1, ws_state, INVALID_ELEMENTDECL_CONTENT, ELEMENTDECL_PCDATA_TOKEN) + +FAXPP_Error +elementdecl_pcdata_end_or_names_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case ')': + env->nesting_level -= 1; + env->state = elementdecl_pcdata_optional_star_state; + report_empty_token(ELEMENTDECL_RPAR_TOKEN, env); + break; + case '|': + env->stored_state = elementdecl_pcdata_name_state1; + env->state = ws_state; + report_empty_token(ELEMENTDECL_BAR_TOKEN, env); + break; + default: + next_char(env); + return INVALID_ELEMENTDECL_CONTENT; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +elementdecl_pcdata_optional_star_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '*': + report_empty_token(ELEMENTDECL_STAR_TOKEN, env); + next_char(env); + // Fall through + default: + env->stored_state = elementdecl_end_state; + env->state = ws_state; + // No next_char + break; + } + return NO_ERROR; +} + +FAXPP_Error +elementdecl_pcdata_end_or_names_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case ')': + env->nesting_level -= 1; + env->state = elementdecl_pcdata_star_state; + report_empty_token(ELEMENTDECL_RPAR_TOKEN, env); + break; + case '|': + env->stored_state = elementdecl_pcdata_name_state1; + env->state = ws_state; + report_empty_token(ELEMENTDECL_BAR_TOKEN, env); + break; + default: + next_char(env); + return INVALID_ELEMENTDECL_CONTENT; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +elementdecl_pcdata_star_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + env->stored_state = elementdecl_end_state; + env->state = ws_state; + + switch(env->current_char) { + case '*': + report_empty_token(ELEMENTDECL_STAR_TOKEN, env); + next_char(env); + break; + default: + next_char(env); + return INVALID_ELEMENTDECL_CONTENT; + } + return NO_ERROR; +} + +FAXPP_Error +elementdecl_pcdata_name_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = elementdecl_pcdata_name_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ELEMENTDECL_NAME; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +elementdecl_pcdata_name_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = elementdecl_pcdata_end_or_names_state2; + env->state = ws_state; + token_end_position(env); + report_token(ELEMENTDECL_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + case ')': + env->state = elementdecl_pcdata_end_or_names_state2; + token_end_position(env); + report_token(ELEMENTDECL_NAME_TOKEN, env); + // No next_char + return NO_ERROR; + case ':': + env->state = elementdecl_pcdata_name_seen_colon_state1; + token_end_position(env); + report_token(ELEMENTDECL_PREFIX_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_ELEMENTDECL_NAME; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +elementdecl_pcdata_name_seen_colon_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = elementdecl_pcdata_name_seen_colon_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ELEMENTDECL_NAME; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +elementdecl_pcdata_name_seen_colon_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = elementdecl_pcdata_end_or_names_state2; + env->state = ws_state; + token_end_position(env); + report_token(ELEMENTDECL_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + case ')': + env->state = elementdecl_pcdata_end_or_names_state2; + token_end_position(env); + report_token(ELEMENTDECL_NAME_TOKEN, env); + // No next_char + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_ELEMENTDECL_NAME; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +elementdecl_end_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { case '>': base_state(env); - token_end_position(env); - report_token(ELEMENTDECL_CONTENT_TOKEN, env); + report_empty_token(ELEMENTDECL_END_TOKEN, env); break; LINE_ENDINGS default: - break; + next_char(env); + return INVALID_ELEMENTDECL_CONTENT; } next_char(env); return NO_ERROR; Modified: trunk/faxpp/src/error.c =================================================================== --- trunk/faxpp/src/error.c 2008-03-11 17:28:12 UTC (rev 41) +++ trunk/faxpp/src/error.c 2008-03-13 21:56:47 UTC (rev 42) @@ -129,6 +129,8 @@ return "INVALID_ATTRIBUTE_TYPE"; case INVALID_DEFAULTDECL: return "INVALID_DEFAULTDECL"; + case INVALID_ELEMENTDECL_CONTENT: + return "INVALID_ELEMENTDECL_CONTENT"; case NO_ERROR: break; } Modified: trunk/faxpp/src/token.c =================================================================== --- trunk/faxpp/src/token.c 2008-03-11 17:28:12 UTC (rev 41) +++ trunk/faxpp/src/token.c 2008-03-13 21:56:47 UTC (rev 42) @@ -103,8 +103,28 @@ return "ELEMENTDECL_PREFIX_TOKEN"; case ELEMENTDECL_NAME_TOKEN: return "ELEMENTDECL_NAME_TOKEN"; - case ELEMENTDECL_CONTENT_TOKEN: - return "ELEMENTDECL_CONTENT_TOKEN"; + case ELEMENTDECL_EMPTY_TOKEN: + return "ELEMENTDECL_EMPTY_TOKEN"; + case ELEMENTDECL_ANY_TOKEN: + return "ELEMENTDECL_ANY_TOKEN"; + case ELEMENTDECL_PCDATA_TOKEN: + return "ELEMENTDECL_PCDATA_TOKEN"; + case ELEMENTDECL_LPAR_TOKEN: + return "ELEMENTDECL_LPAR_TOKEN"; + case ELEMENTDECL_RPAR_TOKEN: + return "ELEMENTDECL_RPAR_TOKEN"; + case ELEMENTDECL_QUESTION_TOKEN: + return "ELEMENTDECL_QUESTION_TOKEN"; + case ELEMENTDECL_STAR_TOKEN: + return "ELEMENTDECL_STAR_TOKEN"; + case ELEMENTDECL_PLUS_TOKEN: + return "ELEMENTDECL_PLUS_TOKEN"; + case ELEMENTDECL_BAR_TOKEN: + return "ELEMENTDECL_BAR_TOKEN"; + case ELEMENTDECL_COMMA_TOKEN: + return "ELEMENTDECL_COMMA_TOKEN"; + case ELEMENTDECL_END_TOKEN: + return "ELEMENTDECL_END_TOKEN"; case ATTLISTDECL_PREFIX_TOKEN: return "ATTLISTDECL_PREFIX_TOKEN"; Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-03-11 17:28:12 UTC (rev 41) +++ trunk/faxpp/src/tokenizer_states.c 2008-03-13 21:56:47 UTC (rev 42) @@ -504,6 +504,62 @@ return "elementdecl_name_seen_colon_state2"; else if(state == elementdecl_content_state) return "elementdecl_content_state"; + else if(state == elementdecl_empty_state1) + return "elementdecl_empty_state1"; + else if(state == elementdecl_empty_state2) + return "elementdecl_empty_state2"; + else if(state == elementdecl_empty_state3) + return "elementdecl_empty_state3"; + else if(state == elementdecl_empty_state4) + return "elementdecl_empty_state4"; + else if(state == elementdecl_any_state1) + return "elementdecl_any_state1"; + else if(state == elementdecl_any_state2) + return "elementdecl_any_state2"; + else if(state == elementdecl_mixed_or_children_state) + return "elementdecl_mixed_or_children_state"; + else if(state == elementdecl_cp_name_state1) + return "elementdecl_cp_name_state1"; + else if(state == elementdecl_cp_name_state2) + return "elementdecl_cp_name_state2"; + else if(state == elementdecl_cp_name_seen_colon_state1) + return "elementdecl_cp_name_seen_colon_state1"; + else if(state == elementdecl_cp_name_seen_colon_state2) + return "elementdecl_cp_name_seen_colon_state2"; + else if(state == elementdecl_cp_cardinality_state) + return "elementdecl_cp_cardinality_state"; + else if(state == elementdecl_cp_separator_or_end_state) + return "elementdecl_cp_separator_or_end_state"; + else if(state == elementdecl_pcdata_state1) + return "elementdecl_pcdata_state1"; + else if(state == elementdecl_pcdata_state2) + return "elementdecl_pcdata_state2"; + else if(state == elementdecl_pcdata_state3) + return "elementdecl_pcdata_state3"; + else if(state == elementdecl_pcdata_state4) + return "elementdecl_pcdata_state4"; + else if(state == elementdecl_pcdata_state5) + return "elementdecl_pcdata_state5"; + else if(state == elementdecl_pcdata_state6) + return "elementdecl_pcdata_state6"; + else if(state == elementdecl_pcdata_end_or_names_state1) + return "elementdecl_pcdata_end_or_names_state1"; + else if(state == elementdecl_pcdata_optional_star_state) + return "elementdecl_pcdata_optional_star_state"; + else if(state == elementdecl_pcdata_end_or_names_state2) + return "elementdecl_pcdata_end_or_names_state2"; + else if(state == elementdecl_pcdata_star_state) + return "elementdecl_pcdata_star_state"; + else if(state == elementdecl_pcdata_name_state1) + return "elementdecl_pcdata_name_state1"; + else if(state == elementdecl_pcdata_name_state2) + return "elementdecl_pcdata_name_state2"; + else if(state == elementdecl_pcdata_name_seen_colon_state1) + return "elementdecl_pcdata_name_seen_colon_state1"; + else if(state == elementdecl_pcdata_name_seen_colon_state2) + return "elementdecl_pcdata_name_seen_colon_state2"; + else if(state == elementdecl_end_state) + return "elementdecl_end_state"; else if(state == attlistdecl_initial_state1) return "attlistdecl_initial_state1"; Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-03-11 17:28:12 UTC (rev 41) +++ trunk/faxpp/src/tokenizer_states.h 2008-03-13 21:56:47 UTC (rev 42) @@ -302,6 +302,34 @@ FAXPP_Error elementdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_seen_colon_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_empty_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_empty_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_empty_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_empty_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_any_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_any_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_mixed_or_children_state(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_cp_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_cp_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_cp_name_seen_colon_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_cp_name_seen_colon_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_cp_cardinality_state(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_cp_separator_or_end_state(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_state6(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_end_or_names_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_optional_star_state(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_end_or_names_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_star_state(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_name_seen_colon_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_pcdata_name_seen_colon_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_end_state(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_initial_state1(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_initial_state2(FAXPP_TokenizerEnv *env); @@ -426,7 +454,7 @@ *********************/ #ifdef DEBUG -const char *state_to_string(FAXPP_StateFunction state); +const char *FAXPP_state_to_string(FAXPP_StateFunction state); #endif #define read_char_no_check(env) \ @@ -441,7 +469,7 @@ } \ \ /* printf("%03d:%03d State: %s, Byte: %c, Char: %08X\n", (env)->line, (env)->column, */ \ -/* state_to_string((env)->state), *(unsigned char*)(env)->position, */ \ +/* FAXPP_state_to_string((env)->state), *(unsigned char*)(env)->position, */ \ /* (env)->current_char); */ \ } Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-03-11 17:28:12 UTC (rev 41) +++ trunk/faxpp/src/xml_parser.c 2008-03-13 21:56:47 UTC (rev 42) @@ -1562,7 +1562,18 @@ break; case ELEMENTDECL_PREFIX_TOKEN: case ELEMENTDECL_NAME_TOKEN: - case ELEMENTDECL_CONTENT_TOKEN: + case ELEMENTDECL_EMPTY_TOKEN: + case ELEMENTDECL_ANY_TOKEN: + case ELEMENTDECL_PCDATA_TOKEN: + case ELEMENTDECL_LPAR_TOKEN: + case ELEMENTDECL_RPAR_TOKEN: + case ELEMENTDECL_QUESTION_TOKEN: + case ELEMENTDECL_STAR_TOKEN: + case ELEMENTDECL_PLUS_TOKEN: + case ELEMENTDECL_BAR_TOKEN: + case ELEMENTDECL_COMMA_TOKEN: + case ELEMENTDECL_END_TOKEN: + case ATTLISTDECL_ATTDEF_PREFIX_TOKEN: case ATTLISTDECL_ATTDEF_NAME_TOKEN: case ATTLISTDECL_NOTATION_NAME_TOKEN: @@ -1615,7 +1626,17 @@ case PE_REFERENCE_TOKEN: case ELEMENTDECL_PREFIX_TOKEN: case ELEMENTDECL_NAME_TOKEN: - case ELEMENTDECL_CONTENT_TOKEN: + case ELEMENTDECL_EMPTY_TOKEN: + case ELEMENTDECL_ANY_TOKEN: + case ELEMENTDECL_PCDATA_TOKEN: + case ELEMENTDECL_LPAR_TOKEN: + case ELEMENTDECL_RPAR_TOKEN: + case ELEMENTDECL_QUESTION_TOKEN: + case ELEMENTDECL_STAR_TOKEN: + case ELEMENTDECL_PLUS_TOKEN: + case ELEMENTDECL_BAR_TOKEN: + case ELEMENTDECL_COMMA_TOKEN: + case ELEMENTDECL_END_TOKEN: case ATTLISTDECL_PREFIX_TOKEN: case ATTLISTDECL_NAME_TOKEN: case ATTLISTDECL_ATTDEF_PREFIX_TOKEN: Modified: trunk/faxpp/tests/xmlconf_runner.c =================================================================== --- trunk/faxpp/tests/xmlconf_runner.c 2008-03-11 17:28:12 UTC (rev 41) +++ trunk/faxpp/tests/xmlconf_runner.c 2008-03-13 21:56:47 UTC (rev 42) @@ -23,10 +23,13 @@ #include "../examples/entity_resolver.h" #include "../examples/output_event.h" -void error(FAXPP_Error err, unsigned int line, unsigned int column) +void error(const FAXPP_Parser *parser, FAXPP_Error err) { + unsigned int line = FAXPP_get_error_line(parser); + if(line != 0) { - fprintf(stderr, "%03d:%03d FAXPP_Error: %s\n", line, column, FAXPP_err_to_string(err)); + output_text(FAXPP_get_base_uri(parser), stderr); + fprintf(stderr, ":%03d:%03d FAXPP_Error: %s\n", line, FAXPP_get_error_column(parser), FAXPP_err_to_string(err)); } else { fprintf(stderr, "FAXPP_Error: %s\n", FAXPP_err_to_string(err)); } @@ -140,10 +143,10 @@ FAXPP_set_external_entity_callback(parser, entity_callback, 0); err = FAXPP_init_parse_file(parser, file); - if(err != NO_ERROR) error(err, 0, 0); + if(err != NO_ERROR) error(parser, err); err = FAXPP_set_base_uri_str(parser, testFile); - if(err != NO_ERROR) error(err, 0, 0); + if(err != NO_ERROR) error(parser, err); while((err = FAXPP_next_event(parser)) == 0) { event = FAXPP_get_current_event(parser); @@ -275,8 +278,7 @@ } } - if(err != NO_ERROR) error(err, FAXPP_get_error_line(parser), - FAXPP_get_error_column(parser)); + if(err != NO_ERROR) error(parser, err); cleanup: printf("\n\nTests run: %d, Tests passed: %d, Tests skipped: %d, Tests failed: %d (%.3f%%)\n", test_passes + test_failures + test_skips, This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-03-11 17:30:01
|
Revision: 41 http://faxpp.svn.sourceforge.net/faxpp/?rev=41&view=rev Author: jpcs Date: 2008-03-11 10:28:12 -0700 (Tue, 11 Mar 2008) Log Message: ----------- Correctly tokenize notation declarations and attlist declarations. Moved a test out of the inner loop in *element_content_state. Modified Paths: -------------- trunk/faxpp/TODO trunk/faxpp/examples/entity_resolver.c trunk/faxpp/examples/output_event.h trunk/faxpp/examples/parser_example.c trunk/faxpp/include/faxpp/error.h trunk/faxpp/include/faxpp/token.h trunk/faxpp/src/attlistdecl.c trunk/faxpp/src/attr_states.h trunk/faxpp/src/doctype.c trunk/faxpp/src/element_states.h trunk/faxpp/src/error.c trunk/faxpp/src/notationdecl.c trunk/faxpp/src/token.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_parser.h Modified: trunk/faxpp/TODO =================================================================== --- trunk/faxpp/TODO 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/TODO 2008-03-11 17:28:12 UTC (rev 41) @@ -8,11 +8,10 @@ xml:space value checking Error for redefining "xml" namespace Error for defining "xmlns" namespace +Parse element decls correctly +Parse parameter entities in markup correctly Large tasks ----------- -Entity expansion framework -DTD internal subset parsing -DTD external subset parsing DTD validation Modified: trunk/faxpp/examples/entity_resolver.c =================================================================== --- trunk/faxpp/examples/entity_resolver.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/examples/entity_resolver.c 2008-03-11 17:28:12 UTC (rev 41) @@ -60,7 +60,7 @@ file = fopen(path, "r"); if(file == 0) { - printf("Open of '%s' failed: %s\n", path, strerror(errno)); +/* printf("Open of '%s' failed: %s\n", path, strerror(errno)); */ return CANT_LOCATE_EXTERNAL_ENTITY; } Modified: trunk/faxpp/examples/output_event.h =================================================================== --- trunk/faxpp/examples/output_event.h 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/examples/output_event.h 2008-03-11 17:28:12 UTC (rev 41) @@ -23,5 +23,6 @@ void output_event(const FAXPP_Event *event, FILE *stream); void output_attr_value(const FAXPP_AttrValue *atval, FILE *stream); +void output_text(const FAXPP_Text *text, FILE *stream); #endif Modified: trunk/faxpp/examples/parser_example.c =================================================================== --- trunk/faxpp/examples/parser_example.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/examples/parser_example.c 2008-03-11 17:28:12 UTC (rev 41) @@ -76,14 +76,15 @@ } while((err = FAXPP_next_event(parser)) == 0) { - output_event(FAXPP_get_current_event(parser), stdout); +/* output_event(FAXPP_get_current_event(parser), stdout); */ if(FAXPP_get_current_event(parser)->type == END_DOCUMENT_EVENT) break; } if(err != NO_ERROR) { - printf("%03d:%03d ERROR: %s\n", FAXPP_get_error_line(parser), + output_text(FAXPP_get_base_uri(parser), stdout); + printf(":%d:%d ERROR: %s\n", FAXPP_get_error_line(parser), FAXPP_get_error_column(parser), FAXPP_err_to_string(err)); } Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/include/faxpp/error.h 2008-03-11 17:28:12 UTC (rev 41) @@ -65,6 +65,8 @@ ILLEGAL_PARAMETER_ENTITY, CANT_LOCATE_EXTERNAL_ENTITY, DONT_PARSE_EXTERNAL_ENTITY, + INVALID_ATTRIBUTE_TYPE, + INVALID_DEFAULTDECL, OUT_OF_MEMORY, ELEMENT_NAME_MISMATCH, Modified: trunk/faxpp/include/faxpp/token.h =================================================================== --- trunk/faxpp/include/faxpp/token.h 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/include/faxpp/token.h 2008-03-11 17:28:12 UTC (rev 41) @@ -79,10 +79,14 @@ ATTLISTDECL_PREFIX_TOKEN, ATTLISTDECL_NAME_TOKEN, - ATTLISTDECL_CONTENT_TOKEN, + ATTLISTDECL_ATTDEF_PREFIX_TOKEN, + ATTLISTDECL_ATTDEF_NAME_TOKEN, + ATTLISTDECL_NOTATION_NAME_TOKEN, + ATTLISTDECL_ENUMERATION_NAME_TOKEN, + ATTLISTDECL_END_TOKEN, NOTATIONDECL_NAME_TOKEN, - NOTATIONDECL_CONTENT_TOKEN, + NOTATIONDECL_END_TOKEN, ENTITYDECL_NAME_TOKEN, ENTITYDECL_VALUE_TOKEN, Modified: trunk/faxpp/src/attlistdecl.c =================================================================== --- trunk/faxpp/src/attlistdecl.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/attlistdecl.c 2008-03-11 17:28:12 UTC (rev 41) @@ -70,18 +70,16 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = attlistdecl_content_state; - env->state = ws_state; + env->state = attlistdecl_attdef_name_state1; token_end_position(env); report_token(ATTLISTDECL_NAME_TOKEN, env); next_char(env); return NO_ERROR; case '>': - env->state = attlistdecl_content_state; + env->state = attlistdecl_attdef_name_state1; token_end_position(env); report_token(ATTLISTDECL_NAME_TOKEN, env); // no next char - token_start_position(env); return NO_ERROR; case ':': env->state = attlistdecl_name_seen_colon_state1; @@ -129,18 +127,16 @@ switch(env->current_char) { WHITESPACE: - env->stored_state = attlistdecl_content_state; - env->state = ws_state; + env->state = attlistdecl_attdef_name_state1; token_end_position(env); report_token(ATTLISTDECL_NAME_TOKEN, env); next_char(env); return NO_ERROR; case '>': - env->state = attlistdecl_content_state; + env->state = attlistdecl_attdef_name_state1; token_end_position(env); report_token(ATTLISTDECL_NAME_TOKEN, env); // no next char - token_start_position(env); return NO_ERROR; default: break; @@ -156,21 +152,704 @@ } FAXPP_Error -attlistdecl_content_state(FAXPP_TokenizerEnv *env) +attlistdecl_attdef_name_state1(FAXPP_TokenizerEnv *env) { read_char(env); switch(env->current_char) { + WHITESPACE: + break; case '>': base_state(env); + report_empty_token(ATTLISTDECL_END_TOKEN, env); + break; + default: + env->state = attlistdecl_attdef_name_state2; + token_start_position(env); + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ATTLISTDECL_NAME; + return NO_ERROR; + } + + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_attdef_name_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = attlistdecl_atttype_state; + env->state = ws_state; + token_end_position(env); + report_token(ATTLISTDECL_ATTDEF_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + case ':': + env->state = attlistdecl_attdef_name_seen_colon_state1; + token_end_position(env); + report_token(ATTLISTDECL_ATTDEF_PREFIX_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_ATTLISTDECL_NAME; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_attdef_name_seen_colon_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = attlistdecl_attdef_name_seen_colon_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ATTLISTDECL_NAME; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_attdef_name_seen_colon_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = attlistdecl_atttype_state; + env->state = ws_state; + token_end_position(env); + report_token(ATTLISTDECL_ATTDEF_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_ATTLISTDECL_NAME; + } + + // Never happens + return NO_ERROR; +} + +/* [54] AttType ::= StringType | TokenizedType | EnumeratedType */ +/* [55] StringType ::= 'CDATA' */ +/* [56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default] */ +/* | 'IDREF' [VC: IDREF] */ +/* | 'IDREFS' [VC: IDREF] */ +/* | 'ENTITY' [VC: Entity Name] */ +/* | 'ENTITIES' [VC: Entity Name] */ +/* | 'NMTOKEN' [VC: Name Token] */ +/* | 'NMTOKENS' [VC: Name Token] */ + +/* [57] EnumeratedType ::= NotationType | Enumeration */ +/* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' [VC: Notation Attributes] */ +/* [VC: One Notation Per Element Type] */ +/* [VC: No Notation on Empty Element] */ +/* [VC: No Duplicate Tokens] */ +/* [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration] */ +/* [VC: No Duplicate Tokens] */ +FAXPP_Error +attlistdecl_atttype_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case 'C': + env->state = attlistdecl_atttype_cdata_state1; + break; + case 'I': + env->state = attlistdecl_atttype_id_state1; + break; + case 'E': + env->state = attlistdecl_atttype_entity_state1; + break; + case 'N': + env->state = attlistdecl_atttype_nmtoken_state1; + break; + case '(': + env->stored_state = attlistdecl_atttype_enumeration_name_state1; + env->state = ws_state; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(attlistdecl_atttype_entity_state1, 'N', 0, attlistdecl_atttype_entity_state2, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_entity_state2, 'T', 0, attlistdecl_atttype_entity_state3, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_entity_state3, 'I', 0, attlistdecl_atttype_entity_state4, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_entity_state4, 'T', 0, attlistdecl_atttype_entity_state5, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_entity_state5(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case 'Y': + // TBD Tokens for these - jpcs + env->stored_state = attlistdecl_default_state1; + env->state = ws_state; + break; + case 'I': + env->state = attlistdecl_atttype_entities_state1; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +// TBD Tokens for these - jpcs + +SINGLE_CHAR_STATE(attlistdecl_atttype_entities_state1, 'E', 0, attlistdecl_atttype_entities_state2, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_entities_state2, 'S', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_nmtoken_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case 'O': + env->state = attlistdecl_atttype_notation_state1; + break; + case 'M': + env->state = attlistdecl_atttype_nmtoken_state2; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state2, 'T', 0, attlistdecl_atttype_nmtoken_state3, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state3, 'O', 0, attlistdecl_atttype_nmtoken_state4, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state4, 'K', 0, attlistdecl_atttype_nmtoken_state5, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state5, 'E', 0, attlistdecl_atttype_nmtoken_state6, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_nmtoken_state6, 'N', 0, attlistdecl_atttype_nmtoken_state7, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_nmtoken_state7(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + // TBD Tokens for these - jpcs + env->stored_state = attlistdecl_default_state1; + env->state = ws_state; + break; + case 'S': + // TBD Tokens for these - jpcs + env->stored_state = attlistdecl_default_state1; + env->state = ws_plus_state; + break; + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +// TBD Tokens for these - jpcs + +SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state1, 'T', 0, attlistdecl_atttype_notation_state2, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state2, 'A', 0, attlistdecl_atttype_notation_state3, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state3, 'T', 0, attlistdecl_atttype_notation_state4, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state4, 'I', 0, attlistdecl_atttype_notation_state5, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state5, 'O', 0, attlistdecl_atttype_notation_state6, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_notation_state6, 'N', attlistdecl_atttype_notation_state7, ws_plus_state, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_notation_state7(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '(': + env->stored_state = attlistdecl_atttype_notation_name_state1; + env->state = ws_state; + break; + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_atttype_notation_name_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = attlistdecl_atttype_notation_name_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_ATTRIBUTE_TYPE; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_atttype_notation_name_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = attlistdecl_atttype_notation_separator_state; + env->state = ws_state; token_end_position(env); - report_token(ATTLISTDECL_CONTENT_TOKEN, env); + report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env); break; + case '|': + env->stored_state = attlistdecl_atttype_notation_name_state1; + env->state = ws_state; + token_end_position(env); + report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env); + break; + case ')': + env->stored_state = attlistdecl_default_state1; + env->state = ws_plus_state; + token_end_position(env); + report_token(ATTLISTDECL_NOTATION_NAME_TOKEN, env); + break; + default: + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_ATTRIBUTE_TYPE; + return NO_ERROR; + } + + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_atttype_notation_separator_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '|': + env->stored_state = attlistdecl_atttype_notation_name_state1; + env->state = ws_state; + break; + case ')': + env->stored_state = attlistdecl_default_state1; + env->state = ws_plus_state; + break; + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(attlistdecl_atttype_id_state1, 'D', 0, attlistdecl_atttype_id_state2, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_id_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + // TBD Tokens for these - jpcs + env->stored_state = attlistdecl_default_state1; + env->state = ws_state; + break; + case 'R': + env->state = attlistdecl_atttype_idref_state1; + break; + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +SINGLE_CHAR_STATE(attlistdecl_atttype_idref_state1, 'E', 0, attlistdecl_atttype_idref_state2, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_idref_state2, 'F', 0, attlistdecl_atttype_idref_state3, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_idref_state3(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + // TBD Tokens for these - jpcs + env->stored_state = attlistdecl_default_state1; + env->state = ws_state; + break; + case 'S': + // TBD Tokens for these - jpcs + env->stored_state = attlistdecl_default_state1; + env->state = ws_plus_state; + break; + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +// TBD Tokens for these - jpcs + +SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state1, 'D', 0, attlistdecl_atttype_cdata_state2, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state2, 'A', 0, attlistdecl_atttype_cdata_state3, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state3, 'T', 0, attlistdecl_atttype_cdata_state4, INVALID_ATTRIBUTE_TYPE) +SINGLE_CHAR_STATE(attlistdecl_atttype_cdata_state4, 'A', attlistdecl_default_state1, ws_plus_state, INVALID_ATTRIBUTE_TYPE) + +FAXPP_Error +attlistdecl_atttype_enumeration_name_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { LINE_ENDINGS default: + env->state = attlistdecl_atttype_enumeration_name_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_ATTRIBUTE_TYPE; break; } + + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_atttype_enumeration_name_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = attlistdecl_atttype_enumeration_separator_state; + env->state = ws_state; + token_end_position(env); + report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env); + break; + case '|': + env->stored_state = attlistdecl_atttype_enumeration_name_state1; + env->state = ws_state; + token_end_position(env); + report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env); + break; + case ')': + env->stored_state = attlistdecl_default_state1; + env->state = ws_plus_state; + token_end_position(env); + report_token(ATTLISTDECL_ENUMERATION_NAME_TOKEN, env); + break; + default: + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_ATTRIBUTE_TYPE; + return NO_ERROR; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_atttype_enumeration_separator_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '|': + env->stored_state = attlistdecl_atttype_enumeration_name_state1; + env->state = ws_state; + break; + case ')': + env->stored_state = attlistdecl_default_state1; + env->state = ws_plus_state; + break; + default: + next_char(env); + return INVALID_ATTRIBUTE_TYPE; + } + + next_char(env); + return NO_ERROR; +} + +/* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' */ +/* | (('#FIXED' S)? AttValue) [VC: Required Attribute] */ +/* [VC: Attribute Default Value Syntactically Correct] */ +/* [WFC: No < in Attribute Values] */ +/* [VC: Fixed Attribute Default] */ +FAXPP_Error +attlistdecl_default_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '#': + env->state = attlistdecl_default_state2; + next_char(env); + return NO_ERROR; + case '\'': + env->state = attlistdecl_attvalue_apos_state; + break; + case '"': + env->state = attlistdecl_attvalue_quot_state; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_DEFAULTDECL; + } + + next_char(env); + token_start_position(env); + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_default_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case 'R': + env->state = attlistdecl_default_required_state1; + break; + case 'I': + env->state = attlistdecl_default_implied_state1; + break; + case 'F': + env->state = attlistdecl_default_fixed_state1; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_DEFAULTDECL; + } + + next_char(env); + return NO_ERROR; +} + +// TBD Tokens for these - jpcs + +SINGLE_CHAR_STATE(attlistdecl_default_implied_state1, 'M', 0, attlistdecl_default_implied_state2, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_implied_state2, 'P', 0, attlistdecl_default_implied_state3, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_implied_state3, 'L', 0, attlistdecl_default_implied_state4, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_implied_state4, 'I', 0, attlistdecl_default_implied_state5, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_implied_state5, 'E', 0, attlistdecl_default_implied_state6, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_implied_state6, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL) + +SINGLE_CHAR_STATE(attlistdecl_default_required_state1, 'E', 0, attlistdecl_default_required_state2, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_required_state2, 'Q', 0, attlistdecl_default_required_state3, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_required_state3, 'U', 0, attlistdecl_default_required_state4, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_required_state4, 'I', 0, attlistdecl_default_required_state5, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_required_state5, 'R', 0, attlistdecl_default_required_state6, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_required_state6, 'E', 0, attlistdecl_default_required_state7, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_required_state7, 'D', 0, attlistdecl_attdef_name_state1, INVALID_DEFAULTDECL) + +SINGLE_CHAR_STATE(attlistdecl_default_fixed_state1, 'I', 0, attlistdecl_default_fixed_state2, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_fixed_state2, 'X', 0, attlistdecl_default_fixed_state3, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_fixed_state3, 'E', 0, attlistdecl_default_fixed_state4, INVALID_DEFAULTDECL) +SINGLE_CHAR_STATE(attlistdecl_default_fixed_state4, 'D', attlistdecl_attvalue_start_state, ws_plus_state, INVALID_DEFAULTDECL) + +FAXPP_Error +attlistdecl_attvalue_start_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '\'': + env->state = attlistdecl_attvalue_apos_state; + break; + case '"': + env->state = attlistdecl_attvalue_quot_state; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_DEFAULTDECL; + } + + next_char(env); + token_start_position(env); + return NO_ERROR; +} + +FAXPP_Error +attlistdecl_attvalue_apos_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + if(env->position >= env->buffer_end) { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(ATTRIBUTE_VALUE_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } + + read_char_no_check(env); + + switch(env->current_char) { + case '\'': + env->state = attlistdecl_attdef_name_state1; + token_end_position(env); + report_token(ATTRIBUTE_VALUE_TOKEN, env); + next_char(env); + return NO_ERROR; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(ATTRIBUTE_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case '<': + next_char(env); + return INVALID_CHAR_IN_ATTRIBUTE; + LINE_ENDINGS + case '\t': + if(env->normalize_attrs) { + // Move the token to the buffer, to normalize it + FAXPP_Error err = FAXPP_tokenizer_release_buffer(env, 0); + if(err != NO_ERROR) return err; + env->current_char = ' '; + } + break; + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens return NO_ERROR; } +FAXPP_Error +attlistdecl_attvalue_quot_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + if(env->position >= env->buffer_end) { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(ATTRIBUTE_VALUE_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } + + read_char_no_check(env); + + switch(env->current_char) { + case '"': + env->state = attlistdecl_attdef_name_state1; + token_end_position(env); + report_token(ATTRIBUTE_VALUE_TOKEN, env); + next_char(env); + return NO_ERROR; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(ATTRIBUTE_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case '<': + next_char(env); + return INVALID_CHAR_IN_ATTRIBUTE; + LINE_ENDINGS + case '\t': { + if(env->normalize_attrs) { + // Move the token to the buffer, to normalize it + FAXPP_Error err = FAXPP_tokenizer_release_buffer(env, 0); + if(err != NO_ERROR) return err; + env->current_char = ' '; + } + break; + } + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + + Modified: trunk/faxpp/src/attr_states.h =================================================================== --- trunk/faxpp/src/attr_states.h 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/attr_states.h 2008-03-11 17:28:12 UTC (rev 41) @@ -248,7 +248,7 @@ PREFIX(attr_value_apos_state)(FAXPP_TokenizerEnv *env) { while(1) { - if(env->position >= env->buffer_end) { + END_CHECK_IF { if(env->token.value.ptr) { token_end_position(env); if(env->token.value.len != 0) { @@ -309,7 +309,7 @@ PREFIX(attr_value_quot_state)(FAXPP_TokenizerEnv *env) { while(1) { - if(env->position >= env->buffer_end) { + END_CHECK_IF { if(env->token.value.ptr) { token_end_position(env); if(env->token.value.len != 0) { @@ -373,7 +373,7 @@ PREFIX(attr_value_state_en)(FAXPP_TokenizerEnv *env) { while(1) { - if(env->position >= env->buffer_end) { + END_CHECK_IF { if(env->token.value.ptr) { token_end_position(env); if(env->token.value.len != 0) { Modified: trunk/faxpp/src/doctype.c =================================================================== --- trunk/faxpp/src/doctype.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/doctype.c 2008-03-11 17:28:12 UTC (rev 41) @@ -381,7 +381,7 @@ switch(env->current_char) { case '\'': - env->state = system_id_ws_state; + env->state = public_id_ws_state2; token_end_position(env); report_token(PUBID_LITERAL_TOKEN, env); next_char(env); @@ -425,7 +425,7 @@ switch(env->current_char) { case '"': - env->state = system_id_ws_state; + env->state = public_id_ws_state2; token_end_position(env); report_token(PUBID_LITERAL_TOKEN, env); next_char(env); @@ -462,6 +462,53 @@ } FAXPP_Error +public_id_ws_state2(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = public_id_ws_state3; + next_char(env); + break; + case '>': + if(env->stored_state == notationdecl_end_state) { + // Notation decls can skip the system literal + retrieve_state(env); + return NO_ERROR; + } + // Fall through + default: + env->state = system_literal_start_state; + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +public_id_ws_state3(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '>': + if(env->stored_state == notationdecl_end_state) { + // Notation decls can skip the system literal + retrieve_state(env); + return NO_ERROR; + } + // Fall through + default: + env->state = system_literal_start_state; + break; + } + return NO_ERROR; +} + +FAXPP_Error doctype_internal_subset_start_state(FAXPP_TokenizerEnv *env) { read_char(env); Modified: trunk/faxpp/src/element_states.h =================================================================== --- trunk/faxpp/src/element_states.h 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/element_states.h 2008-03-11 17:28:12 UTC (rev 41) @@ -190,84 +190,158 @@ FAXPP_Error PREFIX(element_content_state)(FAXPP_TokenizerEnv *env) { - while(1) { - if(env->position >= env->buffer_end) { - if(env->token.value.ptr) { - token_end_position(env); - if(env->token.value.len != 0) { - report_token(CHARACTERS_TOKEN, env); - return NO_ERROR; + if((env)->token_buffer.cursor) { + + while(1) { + END_CHECK_IF { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(CHARACTERS_TOKEN, env); + return NO_ERROR; + } } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; } - token_start_position(env); - return PREMATURE_END_OF_BUFFER; + + READ_CHAR; + + switch(env->current_char) { + case '<': + env->state = PREFIX(element_content_markup_state); + token_end_position(env); + report_token(CHARACTERS_TOKEN, env); + goto next_char_no_error; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(CHARACTERS_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case ']': + env->state = PREFIX(element_content_rsquare_state1); + goto next_char_no_error; + LINE_ENDINGS_LABEL(0) + break; + + // 0x0A, 0x0D, 0x26, 0x3C, 0x5D - Done above + + case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07: + case 0x08: case 0x0B: case 0x0C: case 0x0E: case 0x0F: + case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: + case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F: + goto restricted_char_error; + + case 0x09: + case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x27: + case 0x28: case 0x29: case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: case 0x2F: + case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37: + case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3D: case 0x3E: case 0x3F: + case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: + case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: + case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: + case 0x58: case 0x59: case 0x5A: case 0x5B: case 0x5C: case 0x5E: case 0x5F: + case 0x60: case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: + case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: + case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: + case 0x78: case 0x79: case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E: + // The char is a valid one byte char + break; + + default: + DEFAULT_CASE; + + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) + goto restricted_char_error; + break; + } + + next_char_append(env); + next_char_position(env); } - READ_CHAR; + } else { - switch(env->current_char) { - case '<': - env->state = PREFIX(element_content_markup_state); - token_end_position(env); - report_token(CHARACTERS_TOKEN, env); - next_char_no_error: - next_char(env); - return NO_ERROR; - case '&': - store_state(env); - env->state = reference_state; - token_end_position(env); - report_token(CHARACTERS_TOKEN, env); - next_char(env); - token_start_position(env); - return NO_ERROR; - case ']': - env->state = PREFIX(element_content_rsquare_state1); - goto next_char_no_error; - LINE_ENDINGS - break; + while(1) { + END_CHECK_IF { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(CHARACTERS_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } - // 0x0A, 0x0D, 0x26, 0x3C, 0x5D - Done above + READ_CHAR; - case 0x7F: - if(env->non_restricted_char == NON_RESTRICTED_CHAR11) { - case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07: - case 0x08: case 0x0B: case 0x0C: case 0x0E: case 0x0F: - case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: - case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F: - restricted_char_error: + switch(env->current_char) { + case '<': + env->state = PREFIX(element_content_markup_state); + token_end_position(env); + report_token(CHARACTERS_TOKEN, env); + goto next_char_no_error; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(CHARACTERS_TOKEN, env); next_char(env); - return RESTRICTED_CHAR; - } - break; - case 0x09: - case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x27: - case 0x28: case 0x29: case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: case 0x2F: - case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37: - case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3D: case 0x3E: case 0x3F: - case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: - case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: - case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: - case 0x58: case 0x59: case 0x5A: case 0x5B: case 0x5C: case 0x5E: case 0x5F: - case 0x60: case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: - case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: - case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: - case 0x78: case 0x79: case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E: - // The char is a valid one byte char - break; + token_start_position(env); + return NO_ERROR; + case ']': + env->state = PREFIX(element_content_rsquare_state1); + goto next_char_no_error; + LINE_ENDINGS_LABEL(1) + break; - default: - DEFAULT_CASE; + // 0x0A, 0x0D, 0x26, 0x3C, 0x5D - Done above - if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) + case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07: + case 0x08: case 0x0B: case 0x0C: case 0x0E: case 0x0F: + case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: + case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F: goto restricted_char_error; - break; + + case 0x09: + case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x27: + case 0x28: case 0x29: case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: case 0x2F: + case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37: + case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3D: case 0x3E: case 0x3F: + case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: + case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: + case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: + case 0x58: case 0x59: case 0x5A: case 0x5B: case 0x5C: case 0x5E: case 0x5F: + case 0x60: case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: + case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: + case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: + case 0x78: case 0x79: case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E: + // The char is a valid one byte char + break; + + default: + DEFAULT_CASE; + + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) + goto restricted_char_error; + break; + } + + next_char_position(env); } - next_char(env); } - // Never happens +restricted_char_error: + next_char(env); + return RESTRICTED_CHAR; +next_char_no_error: + next_char(env); return NO_ERROR; } @@ -311,7 +385,7 @@ FAXPP_Error PREFIX(element_content_rsquare_state1)(FAXPP_TokenizerEnv *env) { - if(env->position >= env->buffer_end) { + END_CHECK_IF { if(env->token.value.ptr) { token_end_position(env); if(env->token.value.len != 0) { @@ -341,7 +415,7 @@ FAXPP_Error PREFIX(element_content_rsquare_state2)(FAXPP_TokenizerEnv *env) { - if(env->position >= env->buffer_end) { + END_CHECK_IF { if(env->token.value.ptr) { token_end_position(env); if(env->token.value.len != 0) { Modified: trunk/faxpp/src/error.c =================================================================== --- trunk/faxpp/src/error.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/error.c 2008-03-11 17:28:12 UTC (rev 41) @@ -125,6 +125,10 @@ return "CANT_LOCATE_EXTERNAL_ENTITY"; case DONT_PARSE_EXTERNAL_ENTITY: return "DONT_PARSE_EXTERNAL_ENTITY"; + case INVALID_ATTRIBUTE_TYPE: + return "INVALID_ATTRIBUTE_TYPE"; + case INVALID_DEFAULTDECL: + return "INVALID_DEFAULTDECL"; case NO_ERROR: break; } Modified: trunk/faxpp/src/notationdecl.c =================================================================== --- trunk/faxpp/src/notationdecl.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/notationdecl.c 2008-03-11 17:28:12 UTC (rev 41) @@ -96,16 +96,42 @@ read_char(env); switch(env->current_char) { - case '>': - base_state(env); - token_end_position(env); - report_token(NOTATIONDECL_CONTENT_TOKEN, env); + case 'S': + env->stored_state = notationdecl_end_state; + env->state = system_id_initial_state1; break; + case 'P': + env->stored_state = notationdecl_end_state; + env->state = public_id_initial_state1; + break; LINE_ENDINGS default: - break; + next_char(env); + return INVALID_DTD_DECL; } next_char(env); return NO_ERROR; } +FAXPP_Error +notationdecl_end_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '>': + base_state(env); + report_empty_token(NOTATIONDECL_END_TOKEN, env); + next_char(env); + token_start_position(env); + break; + default: + next_char(env); + return INVALID_DTD_DECL; + } + return NO_ERROR; +} + Modified: trunk/faxpp/src/token.c =================================================================== --- trunk/faxpp/src/token.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/token.c 2008-03-11 17:28:12 UTC (rev 41) @@ -110,13 +110,21 @@ return "ATTLISTDECL_PREFIX_TOKEN"; case ATTLISTDECL_NAME_TOKEN: return "ATTLISTDECL_NAME_TOKEN"; - case ATTLISTDECL_CONTENT_TOKEN: - return "ATTLISTDECL_CONTENT_TOKEN"; + case ATTLISTDECL_ATTDEF_PREFIX_TOKEN: + return "ATTLISTDECL_ATTDEF_PREFIX_TOKEN"; + case ATTLISTDECL_ATTDEF_NAME_TOKEN: + return "ATTLISTDECL_ATTDEF_NAME_TOKEN"; + case ATTLISTDECL_NOTATION_NAME_TOKEN: + return "ATTLISTDECL_NOTATION_NAME_TOKEN"; + case ATTLISTDECL_ENUMERATION_NAME_TOKEN: + return "ATTLISTDECL_ENUMERATION_NAME_TOKEN"; + case ATTLISTDECL_END_TOKEN: + return "ATTLISTDECL_END_TOKEN"; case NOTATIONDECL_NAME_TOKEN: return "NOTATIONDECL_NAME_TOKEN"; - case NOTATIONDECL_CONTENT_TOKEN: - return "NOTATIONDECL_CONTENT_TOKEN"; + case NOTATIONDECL_END_TOKEN: + return "NOTATIONDECL_END_TOKEN"; case ENTITYDECL_NAME_TOKEN: return "ENTITYDECL_NAME_TOKEN"; Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/tokenizer_states.c 2008-03-11 17:28:12 UTC (rev 41) @@ -477,6 +477,10 @@ return "pubid_literal_apos_state"; else if(state == pubid_literal_quot_state) return "pubid_literal_quot_state"; + else if(state == public_id_ws_state2) + return "public_id_ws_state2"; + else if(state == public_id_ws_state3) + return "public_id_ws_state3"; else if(state == elementdecl_or_entitydecl_state) return "elementdecl_or_entitydecl_state"; @@ -523,9 +527,134 @@ return "attlistdecl_name_seen_colon_state1"; else if(state == attlistdecl_name_seen_colon_state2) return "attlistdecl_name_seen_colon_state2"; - else if(state == attlistdecl_content_state) - return "attlistdecl_content_state"; + else if(state == attlistdecl_attdef_name_state1) + return "attlistdecl_attdef_name_state1"; + else if(state == attlistdecl_attdef_name_state2) + return "attlistdecl_attdef_name_state2"; + else if(state == attlistdecl_attdef_name_seen_colon_state1) + return "attlistdecl_attdef_name_seen_colon_state1"; + else if(state == attlistdecl_attdef_name_seen_colon_state2) + return "attlistdecl_attdef_name_seen_colon_state2"; + else if(state == attlistdecl_atttype_state) + return "attlistdecl_atttype_state"; + else if(state == attlistdecl_atttype_cdata_state1) + return "attlistdecl_atttype_cdata_state1"; + else if(state == attlistdecl_atttype_cdata_state2) + return "attlistdecl_atttype_cdata_state2"; + else if(state == attlistdecl_atttype_cdata_state3) + return "attlistdecl_atttype_cdata_state3"; + else if(state == attlistdecl_atttype_cdata_state4) + return "attlistdecl_atttype_cdata_state4"; + else if(state == attlistdecl_atttype_id_state1) + return "attlistdecl_atttype_id_state1"; + else if(state == attlistdecl_atttype_id_state2) + return "attlistdecl_atttype_id_state2"; + else if(state == attlistdecl_atttype_idref_state1) + return "attlistdecl_atttype_idref_state1"; + else if(state == attlistdecl_atttype_idref_state2) + return "attlistdecl_atttype_idref_state2"; + else if(state == attlistdecl_atttype_idref_state3) + return "attlistdecl_atttype_idref_state3"; + else if(state == attlistdecl_atttype_entity_state1) + return "attlistdecl_atttype_entity_state1"; + else if(state == attlistdecl_atttype_entity_state2) + return "attlistdecl_atttype_entity_state2"; + else if(state == attlistdecl_atttype_entity_state3) + return "attlistdecl_atttype_entity_state3"; + else if(state == attlistdecl_atttype_entity_state4) + return "attlistdecl_atttype_entity_state4"; + else if(state == attlistdecl_atttype_entity_state5) + return "attlistdecl_atttype_entity_state5"; + else if(state == attlistdecl_atttype_entities_state1) + return "attlistdecl_atttype_entities_state1"; + else if(state == attlistdecl_atttype_entities_state2) + return "attlistdecl_atttype_entities_state2"; + else if(state == attlistdecl_atttype_nmtoken_state1) + return "attlistdecl_atttype_nmtoken_state1"; + else if(state == attlistdecl_atttype_nmtoken_state2) + return "attlistdecl_atttype_nmtoken_state2"; + else if(state == attlistdecl_atttype_nmtoken_state3) + return "attlistdecl_atttype_nmtoken_state3"; + else if(state == attlistdecl_atttype_nmtoken_state4) + return "attlistdecl_atttype_nmtoken_state4"; + else if(state == attlistdecl_atttype_nmtoken_state5) + return "attlistdecl_atttype_nmtoken_state5"; + else if(state == attlistdecl_atttype_nmtoken_state6) + return "attlistdecl_atttype_nmtoken_state6"; + else if(state == attlistdecl_atttype_nmtoken_state7) + return "attlistdecl_atttype_nmtoken_state7"; + else if(state == attlistdecl_atttype_notation_state1) + return "attlistdecl_atttype_notation_state1"; + else if(state == attlistdecl_atttype_notation_state2) + return "attlistdecl_atttype_notation_state2"; + else if(state == attlistdecl_atttype_notation_state3) + return "attlistdecl_atttype_notation_state3"; + else if(state == attlistdecl_atttype_notation_state4) + return "attlistdecl_atttype_notation_state4"; + else if(state == attlistdecl_atttype_notation_state5) + return "attlistdecl_atttype_notation_state5"; + else if(state == attlistdecl_atttype_notation_state6) + return "attlistdecl_atttype_notation_state6"; + else if(state == attlistdecl_atttype_notation_state7) + return "attlistdecl_atttype_notation_state7"; + else if(state == attlistdecl_atttype_notation_name_state1) + return "attlistdecl_atttype_notation_name_state1"; + else if(state == attlistdecl_atttype_notation_name_state2) + return "attlistdecl_atttype_notation_name_state2"; + else if(state == attlistdecl_atttype_notation_separator_state) + return "attlistdecl_atttype_notation_separator_state"; + else if(state == attlistdecl_atttype_enumeration_name_state1) + return "attlistdecl_atttype_enumeration_name_state1"; + else if(state == attlistdecl_atttype_enumeration_name_state2) + return "attlistdecl_atttype_enumeration_name_state2"; + else if(state == attlistdecl_atttype_enumeration_separator_state) + return "attlistdecl_atttype_enumeration_separator_state"; + else if(state == attlistdecl_default_state1) + return "attlistdecl_default_state1"; + else if(state == attlistdecl_default_state2) + return "attlistdecl_default_state2"; + else if(state == attlistdecl_default_implied_state1) + return "attlistdecl_default_implied_state1"; + else if(state == attlistdecl_default_implied_state2) + return "attlistdecl_default_implied_state2"; + else if(state == attlistdecl_default_implied_state3) + return "attlistdecl_default_implied_state3"; + else if(state == attlistdecl_default_implied_state4) + return "attlistdecl_default_implied_state4"; + else if(state == attlistdecl_default_implied_state5) + return "attlistdecl_default_implied_state5"; + else if(state == attlistdecl_default_implied_state6) + return "attlistdecl_default_implied_state6"; + else if(state == attlistdecl_default_required_state1) + return "attlistdecl_default_required_state1"; + else if(state == attlistdecl_default_required_state2) + return "attlistdecl_default_required_state2"; + else if(state == attlistdecl_default_required_state3) + return "attlistdecl_default_required_state3"; + else if(state == attlistdecl_default_required_state4) + return "attlistdecl_default_required_state4"; + else if(state == attlistdecl_default_required_state5) + return "attlistdecl_default_required_state5"; + else if(state == attlistdecl_default_required_state6) + return "attlistdecl_default_required_state6"; + else if(state == attlistdecl_default_required_state7) + return "attlistdecl_default_required_state7"; + else if(state == attlistdecl_default_fixed_state1) + return "attlistdecl_default_fixed_state1"; + else if(state == attlistdecl_default_fixed_state2) + return "attlistdecl_default_fixed_state2"; + else if(state == attlistdecl_default_fixed_state3) + return "attlistdecl_default_fixed_state3"; + else if(state == attlistdecl_default_fixed_state4) + return "attlistdecl_default_fixed_state4"; + else if(state == attlistdecl_attvalue_start_state) + return "attlistdecl_attvalue_start_state"; + else if(state == attlistdecl_attvalue_apos_state) + return "attlistdecl_attvalue_apos_state"; + else if(state == attlistdecl_attvalue_quot_state) + return "attlistdecl_attvalue_quot_state"; + else if(state == notationdecl_initial_state1) return "notationdecl_initial_state1"; else if(state == notationdecl_initial_state1) @@ -548,6 +677,8 @@ return "notationdecl_name_state2"; else if(state == notationdecl_content_state) return "notationdecl_content_state"; + else if(state == notationdecl_end_state) + return "notationdecl_end_state"; else if(state == entitydecl_initial_state1) return "entitydecl_initial_state1"; @@ -878,9 +1009,8 @@ // Include the default states #define PREFIX(name) default_ ## name -#define END_CHECK \ - if((env)->position >= (env)->buffer_end) \ - return PREMATURE_END_OF_BUFFER +#define END_CHECK_IF if((env)->position >= (env)->buffer_end) +#define END_CHECK END_CHECK_IF { return PREMATURE_END_OF_BUFFER; } #define READ_CHAR read_char_no_check(env) #define DEFAULT_CASE (void)0 @@ -890,14 +1020,14 @@ #undef DEFAULT_CASE #undef READ_CHAR #undef END_CHECK +#undef END_CHECK_IF #undef PREFIX // Include the utf8 states #define PREFIX(name) utf8_ ## name -#define END_CHECK \ - if((env)->position >= (env)->buffer_end) \ - return PREMATURE_END_OF_BUFFER +#define END_CHECK_IF if((env)->position >= (env)->buffer_end) +#define END_CHECK END_CHECK_IF { return PREMATURE_END_OF_BUFFER; } #define READ_CHAR \ /* Assume it's a one byte character for now */ \ env->current_char = *(uint8_t*)env->position; \ @@ -923,14 +1053,14 @@ #undef DEFAULT_CASE #undef READ_CHAR #undef END_CHECK +#undef END_CHECK_IF #undef PREFIX // Include the utf16 states #define PREFIX(name) utf16_ ## name -#define END_CHECK \ - if((env)->position + 1 >= (env)->buffer_end) \ - return PREMATURE_END_OF_BUFFER +#define END_CHECK_IF if((env)->position + 1 >= (env)->buffer_end) +#define END_CHECK END_CHECK_IF { return PREMATURE_END_OF_BUFFER; } #define READ_CHAR \ /* Assume it's not a surrogate pair for now */ \ env->current_char = *(uint16_t*)env->position; \ @@ -956,5 +1086,6 @@ #undef DEFAULT_CASE #undef READ_CHAR #undef END_CHECK +#undef END_CHECK_IF #undef PREFIX Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/tokenizer_states.h 2008-03-11 17:28:12 UTC (rev 41) @@ -288,6 +288,8 @@ FAXPP_Error pubid_literal_start_state(FAXPP_TokenizerEnv *env); FAXPP_Error pubid_literal_apos_state(FAXPP_TokenizerEnv *env); FAXPP_Error pubid_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error public_id_ws_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error public_id_ws_state3(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_or_entitydecl_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state1(FAXPP_TokenizerEnv *env); @@ -311,8 +313,72 @@ FAXPP_Error attlistdecl_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_name_seen_colon_state2(FAXPP_TokenizerEnv *env); -FAXPP_Error attlistdecl_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attdef_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attdef_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attdef_name_seen_colon_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attdef_name_seen_colon_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_state(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_cdata_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_cdata_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_cdata_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_cdata_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_id_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_id_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_idref_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_idref_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_idref_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entity_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entity_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entity_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entity_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entity_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entities_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_entities_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state6(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_nmtoken_state7(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state6(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_state7(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_notation_separator_state(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_enumeration_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_enumeration_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_atttype_enumeration_separator_state(FAXPP_TokenizerEnv *env); + +FAXPP_Error attlistdecl_default_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_implied_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_implied_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_implied_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_implied_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_implied_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_implied_state6(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state6(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_required_state7(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_fixed_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_fixed_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_fixed_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_default_fixed_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attvalue_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attvalue_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error attlistdecl_attvalue_quot_state(FAXPP_TokenizerEnv *env); + FAXPP_Error notationdecl_initial_state1(FAXPP_TokenizerEnv *env); FAXPP_Error notationdecl_initial_state2(FAXPP_TokenizerEnv *env); FAXPP_Error notationdecl_initial_state3(FAXPP_TokenizerEnv *env); @@ -323,6 +389,7 @@ FAXPP_Error notationdecl_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error notationdecl_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error notationdecl_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_end_state(FAXPP_TokenizerEnv *env); FAXPP_Error entitydecl_initial_state1(FAXPP_TokenizerEnv *env); FAXPP_Error entitydecl_initial_state2(FAXPP_TokenizerEnv *env); @@ -385,15 +452,25 @@ read_char_no_check((env)); \ } +#define next_char_append(env) \ +{ \ + FAXPP_Error err = FAXPP_buffer_append_ch(&(env)->token_buffer, (env)->transcoder.encode, (env)->current_char); \ + if(err != 0) return err; \ +} + +#define next_char_position(env) \ +{ \ + (env)->column += 1; \ + (env)->position += (env)->char_len; \ +} + #define next_char(env) \ { \ if((env)->token_buffer.cursor) { \ - FAXPP_Error err = FAXPP_buffer_append_ch(&(env)->token_buffer, (env)->transcoder.encode, (env)->current_char); \ - if(err != 0) return err; \ + next_char_append((env)); \ } \ \ - (env)->column += 1; \ - (env)->position += (env)->char_len; \ + next_char_position((env)); \ } #define token_start_position(env) \ @@ -475,24 +552,26 @@ (env)->stored_state = 0; \ } -#define LINE_ENDINGS \ +#define LINE_ENDINGS_LABEL(label) \ case '\r': { \ Char32 next_char; \ if((env)->position + (env)->char_len >= (env)->buffer_end || \ (env)->decode((env)->position + (env)->char_len, (env)->buffer_end, &next_char) \ == TRANSCODE_PREMATURE_END_OF_BUFFER) { \ if(!(env)->buffer_done) return PREMATURE_END_OF_BUFFER; \ - goto LINE_ENDINGS_INC; \ + goto LINE_ENDINGS_INC ## label; \ } \ - else if(next_char != '\n') goto LINE_ENDINGS_INC; \ - goto LINE_ENDINGS_END; \ + else if(next_char != '\n') goto LINE_ENDINGS_INC ## label; \ + goto LINE_ENDINGS_END ## label; \ } \ case '\n': \ -LINE_ENDINGS_INC: \ +LINE_ENDINGS_INC ## label: \ (env)->line += 1; \ (env)->column = (unsigned int)-1; \ -LINE_ENDINGS_END: +LINE_ENDINGS_END ## label: +#define LINE_ENDINGS LINE_ENDINGS_LABEL(0) + /* * [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-03-06 02:04:04 UTC (rev 40) +++ trunk/faxpp/src/xml_parser.c 2008-03-11 17:28:12 UTC (rev 41) @@ -182,7 +182,10 @@ FAXPP_set_tokenizer_decode(parser->tenv, decode); parser->tenv->user_provided_decode = 1; if(parser->next_event == nc_unsupported_encoding_next_event) { - parser->next_event =... [truncated message content] |
From: <jp...@us...> - 2008-03-06 02:04:02
|
Revision: 40 http://faxpp.svn.sourceforge.net/faxpp/?rev=40&view=rev Author: jpcs Date: 2008-03-05 18:04:04 -0800 (Wed, 05 Mar 2008) Log Message: ----------- Implemented a user specified base URI for the files being parsed, so that the entity callback can keep track of the origin of the entity declaration. Implemented the parsing of external subsets (DTDs). "public" is a keyword in C++ - renamed "public" to "public_id" and "system" to "system_id". Modified Paths: -------------- trunk/faxpp/Makefile.am trunk/faxpp/Makefile.in trunk/faxpp/examples/parser_example.c trunk/faxpp/include/faxpp/event.h trunk/faxpp/include/faxpp/parser.h trunk/faxpp/include/faxpp/transcode.h trunk/faxpp/src/char_classes.h trunk/faxpp/src/doctype.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_parser.h trunk/faxpp/src/xml_tokenizer.c trunk/faxpp/src/xml_tokenizer.h trunk/faxpp/src/xmldecl.c trunk/faxpp/tests/xmlconf_runner.c Added Paths: ----------- trunk/faxpp/examples/entity_resolver.c trunk/faxpp/examples/entity_resolver.h trunk/faxpp/examples/output_event.c trunk/faxpp/examples/output_event.h Modified: trunk/faxpp/Makefile.am =================================================================== --- trunk/faxpp/Makefile.am 2008-03-03 14:32:32 UTC (rev 39) +++ trunk/faxpp/Makefile.am 2008-03-06 02:04:04 UTC (rev 40) @@ -38,14 +38,20 @@ src/notationdecl.c \ src/entitydecl.c +tokenizer_example_LDADD = libfaxpp.la tokenizer_example_SOURCES = examples/tokenizer_example.c -tokenizer_example_LDADD = libfaxpp.la -parser_example_SOURCES = examples/parser_example.c parser_example_LDADD = libfaxpp.la +parser_example_SOURCES = \ +examples/parser_example.c \ +examples/entity_resolver.c \ +examples/output_event.c -xmlconf_runner_SOURCES = tests/xmlconf_runner.c xmlconf_runner_LDADD = libfaxpp.la +xmlconf_runner_SOURCES = \ +tests/xmlconf_runner.c \ +examples/entity_resolver.c \ +examples/output_event.c EXTRA_DIST = \ docs/Doxyfile.api \ Modified: trunk/faxpp/Makefile.in =================================================================== --- trunk/faxpp/Makefile.in 2008-03-03 14:32:32 UTC (rev 39) +++ trunk/faxpp/Makefile.in 2008-03-06 02:04:04 UTC (rev 40) @@ -70,13 +70,15 @@ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ $(libfaxpp_la_LDFLAGS) $(LDFLAGS) -o $@ PROGRAMS = $(noinst_PROGRAMS) -am_parser_example_OBJECTS = parser_example.$(OBJEXT) +am_parser_example_OBJECTS = parser_example.$(OBJEXT) \ + entity_resolver.$(OBJEXT) output_event.$(OBJEXT) parser_example_OBJECTS = $(am_parser_example_OBJECTS) parser_example_DEPENDENCIES = libfaxpp.la am_tokenizer_example_OBJECTS = tokenizer_example.$(OBJEXT) tokenizer_example_OBJECTS = $(am_tokenizer_example_OBJECTS) tokenizer_example_DEPENDENCIES = libfaxpp.la -am_xmlconf_runner_OBJECTS = xmlconf_runner.$(OBJEXT) +am_xmlconf_runner_OBJECTS = xmlconf_runner.$(OBJEXT) \ + entity_resolver.$(OBJEXT) output_event.$(OBJEXT) xmlconf_runner_OBJECTS = $(am_xmlconf_runner_OBJECTS) xmlconf_runner_DEPENDENCIES = libfaxpp.la DEFAULT_INCLUDES = -I. -I$(top_builddir)/src@am__isrc@ @@ -259,12 +261,20 @@ src/notationdecl.c \ src/entitydecl.c +tokenizer_example_LDADD = libfaxpp.la tokenizer_example_SOURCES = examples/tokenizer_example.c -tokenizer_example_LDADD = libfaxpp.la -parser_example_SOURCES = examples/parser_example.c parser_example_LDADD = libfaxpp.la -xmlconf_runner_SOURCES = tests/xmlconf_runner.c +parser_example_SOURCES = \ +examples/parser_example.c \ +examples/entity_resolver.c \ +examples/output_event.c + xmlconf_runner_LDADD = libfaxpp.la +xmlconf_runner_SOURCES = \ +tests/xmlconf_runner.c \ +examples/entity_resolver.c \ +examples/output_event.c + EXTRA_DIST = \ docs/Doxyfile.api \ docs/header.html \ @@ -387,10 +397,12 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doctype.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/element.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elementdecl.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/entity_resolver.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/entitydecl.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/event.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/notationdecl.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/output_event.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parser_example.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pi.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reference.Plo@am__quote@ @@ -578,6 +590,34 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o parser_example.obj `if test -f 'examples/parser_example.c'; then $(CYGPATH_W) 'examples/parser_example.c'; else $(CYGPATH_W) '$(srcdir)/examples/parser_example.c'; fi` +entity_resolver.o: examples/entity_resolver.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT entity_resolver.o -MD -MP -MF $(DEPDIR)/entity_resolver.Tpo -c -o entity_resolver.o `test -f 'examples/entity_resolver.c' || echo '$(srcdir)/'`examples/entity_resolver.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/entity_resolver.Tpo $(DEPDIR)/entity_resolver.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='examples/entity_resolver.c' object='entity_resolver.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o entity_resolver.o `test -f 'examples/entity_resolver.c' || echo '$(srcdir)/'`examples/entity_resolver.c + +entity_resolver.obj: examples/entity_resolver.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT entity_resolver.obj -MD -MP -MF $(DEPDIR)/entity_resolver.Tpo -c -o entity_resolver.obj `if test -f 'examples/entity_resolver.c'; then $(CYGPATH_W) 'examples/entity_resolver.c'; else $(CYGPATH_W) '$(srcdir)/examples/entity_resolver.c'; fi` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/entity_resolver.Tpo $(DEPDIR)/entity_resolver.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='examples/entity_resolver.c' object='entity_resolver.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o entity_resolver.obj `if test -f 'examples/entity_resolver.c'; then $(CYGPATH_W) 'examples/entity_resolver.c'; else $(CYGPATH_W) '$(srcdir)/examples/entity_resolver.c'; fi` + +output_event.o: examples/output_event.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT output_event.o -MD -MP -MF $(DEPDIR)/output_event.Tpo -c -o output_event.o `test -f 'examples/output_event.c' || echo '$(srcdir)/'`examples/output_event.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/output_event.Tpo $(DEPDIR)/output_event.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='examples/output_event.c' object='output_event.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o output_event.o `test -f 'examples/output_event.c' || echo '$(srcdir)/'`examples/output_event.c + +output_event.obj: examples/output_event.c +@am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT output_event.obj -MD -MP -MF $(DEPDIR)/output_event.Tpo -c -o output_event.obj `if test -f 'examples/output_event.c'; then $(CYGPATH_W) 'examples/output_event.c'; else $(CYGPATH_W) '$(srcdir)/examples/output_event.c'; fi` +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/output_event.Tpo $(DEPDIR)/output_event.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='examples/output_event.c' object='output_event.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o output_event.obj `if test -f 'examples/output_event.c'; then $(CYGPATH_W) 'examples/output_event.c'; else $(CYGPATH_W) '$(srcdir)/examples/output_event.c'; fi` + tokenizer_example.o: examples/tokenizer_example.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tokenizer_example.o -MD -MP -MF $(DEPDIR)/tokenizer_example.Tpo -c -o tokenizer_example.o `test -f 'examples/tokenizer_example.c' || echo '$(srcdir)/'`examples/tokenizer_example.c @am__fastdepCC_TRUE@ mv -f $(DEPDIR)/tokenizer_example.Tpo $(DEPDIR)/tokenizer_example.Po Added: trunk/faxpp/examples/entity_resolver.c =================================================================== --- trunk/faxpp/examples/entity_resolver.c (rev 0) +++ trunk/faxpp/examples/entity_resolver.c 2008-03-06 02:04:04 UTC (rev 40) @@ -0,0 +1,73 @@ +/* + * Copyright 2007 Doxological Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "entity_resolver.h" + +char *resolve_paths(const char *base, unsigned int base_len, const char *path, unsigned int path_len) +{ + char *result = malloc(base_len + path_len + 1); + char *ptr = result; + + strncpy(ptr, base, base_len); + ptr += base_len - 1; + + while(ptr >= result && *ptr != '/') { + --ptr; + } + ++ptr; + + strncpy(ptr, path, path_len); + ptr += path_len; + *ptr = 0; + + return result; +} + +unsigned int file_read_callback(void *userData, void *buffer, unsigned int length) +{ + unsigned int result = fread(buffer, 1, length, (FILE*)userData); + if(result < length) { + fclose((FILE*)userData); + } + return result; +} + +FAXPP_Error entity_callback(void *userData, FAXPP_Parser *parser, FAXPP_EntityType type, + const FAXPP_Text *base_uri, const FAXPP_Text *system, const FAXPP_Text *public) +{ + FAXPP_Error err; + FILE *file; + char *path; + + path = resolve_paths((char*)base_uri->ptr, base_uri->len, (char*)system->ptr, system->len); + + file = fopen(path, "r"); + if(file == 0) { + printf("Open of '%s' failed: %s\n", path, strerror(errno)); + return CANT_LOCATE_EXTERNAL_ENTITY; + } + + err = FAXPP_parse_external_entity_callback(parser, type, file_read_callback, file); + if(err == NO_ERROR) + err = FAXPP_set_base_uri_str(parser, path); + + free(path); + return err; +} Added: trunk/faxpp/examples/entity_resolver.h =================================================================== --- trunk/faxpp/examples/entity_resolver.h (rev 0) +++ trunk/faxpp/examples/entity_resolver.h 2008-03-06 02:04:04 UTC (rev 40) @@ -0,0 +1,27 @@ +/* + * Copyright 2007 Doxological Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENTITY_RESOLVER_H +#define ENTITY_RESOLVER_H + +#include <faxpp/parser.h> + +char *resolve_paths(const char *base, unsigned int base_len, const char *path, unsigned int path_len); +unsigned int file_read_callback(void *userData, void *buffer, unsigned int length); +FAXPP_Error entity_callback(void *userData, FAXPP_Parser *parser, FAXPP_EntityType type, + const FAXPP_Text *base_uri, const FAXPP_Text *system, const FAXPP_Text *public); + +#endif Added: trunk/faxpp/examples/output_event.c =================================================================== --- trunk/faxpp/examples/output_event.c (rev 0) +++ trunk/faxpp/examples/output_event.c 2008-03-06 02:04:04 UTC (rev 40) @@ -0,0 +1,264 @@ +/* + * Copyright 2007 Doxological Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* #include <stdio.h> */ +/* #include <stdlib.h> */ +/* #include <errno.h> */ +/* #include <string.h> */ + +#include "output_event.h" + +#define SHOW_URIS 0 +#define SHOW_ENTITIES 0 + +void +output_text(const FAXPP_Text *text, FILE *stream) +{ + char *buffer = (char*)text->ptr; + char *buffer_end = buffer + text->len; + + while(buffer < buffer_end) { + putc(*buffer++, stream); + } +} + +void +output_escaped_attr_text(const FAXPP_Text *text, FILE *stream) +{ + char *buffer = (char*)text->ptr; + char *buffer_end = buffer + text->len; + + while(buffer < buffer_end) { + if(*buffer == '&') { + fprintf(stream, "&"); + } + else if(*buffer == '<') { + fprintf(stream, "<"); + } + else if(*buffer == '"') { + fprintf(stream, """); + } + else { + putc(*buffer, stream); + } + ++buffer; + } +} + +void output_attr_value(const FAXPP_AttrValue *atval, FILE *stream) +{ + while(atval) { + switch(atval->type) { + case CHARACTERS_EVENT: + output_escaped_attr_text(&atval->value, stream); + break; + case ENTITY_REFERENCE_EVENT: + fprintf(stream, "&"); + output_text(&atval->name, stream); + fprintf(stream, ";"); + break; + case DEC_CHAR_REFERENCE_EVENT: + fprintf(stream, "&#"); + output_text(&atval->name, stream); + fprintf(stream, ";"); + break; + case HEX_CHAR_REFERENCE_EVENT: + fprintf(stream, "&#x"); + output_text(&atval->name, stream); + fprintf(stream, ";"); + break; + case ENTITY_REFERENCE_START_EVENT: +#if SHOW_ENTITIES + fprintf(stream, "&"); + output_text(&atval->name, stream); + fprintf(stream, ";("); +#endif + break; + case ENTITY_REFERENCE_END_EVENT: +#if SHOW_ENTITIES + fprintf(stream, ")"); +#endif + break; + default: + break; + } + atval = atval->next; + } +} + +void +output_event(const FAXPP_Event *event, FILE *stream) +{ + int i; + + switch(event->type) { + case START_DOCUMENT_EVENT: + if(event->version.ptr != 0) { + fprintf(stream, "<?xml version=\""); + output_text(&event->version, stream); + if(event->encoding.ptr != 0) { + fprintf(stream, "\" encoding=\""); + output_text(&event->encoding, stream); + } + if(event->standalone.ptr != 0) { + fprintf(stream, "\" standalone=\""); + output_text(&event->standalone, stream); + } + fprintf(stream, "\"?>"); + } + break; + case END_DOCUMENT_EVENT: + break; + case DOCTYPE_EVENT: + fprintf(stream, "<!DOCTYPE "); + + if(event->prefix.ptr != 0) { + output_text(&event->prefix, stream); + fprintf(stream, ":"); + } + output_text(&event->name, stream); + + if(event->system_id.ptr != 0) { + if(event->public_id.ptr != 0) { + fprintf(stream, " PUBLIC \""); + output_text(&event->public_id, stream); + fprintf(stream, "\" \""); + output_text(&event->system_id, stream); + fprintf(stream, "\""); + } + else { + fprintf(stream, " SYSTEM \""); + output_text(&event->system_id, stream); + fprintf(stream, "\""); + } + } + fprintf(stream, ">"); + break; + case START_ELEMENT_EVENT: + case SELF_CLOSING_ELEMENT_EVENT: + fprintf(stream, "<"); +#if SHOW_URIS + if(event->uri.ptr != 0) { + fprintf(stream, "{"); + output_text(&event->uri, stream); + fprintf(stream, "}"); + } else +#endif + if(event->prefix.ptr != 0) { + output_text(&event->prefix, stream); + fprintf(stream, ":"); + } + output_text(&event->name, stream); + + for(i = 0; i < event->attr_count; ++i) { + fprintf(stream, " "); +#if SHOW_URIS + if(event->attrs[i].uri.ptr != 0) { + fprintf(stream, "{"); + output_text(&event->attrs[i].uri, stream); + fprintf(stream, "}"); + } else +#endif + if(event->attrs[i].prefix.ptr != 0) { + output_text(&event->attrs[i].prefix, stream); + fprintf(stream, ":"); + } + output_text(&event->attrs[i].name, stream); + fprintf(stream, "=\""); + output_attr_value(&event->attrs[i].value, stream); + fprintf(stream, "\""); + } + + if(event->type == SELF_CLOSING_ELEMENT_EVENT) + fprintf(stream, "/>"); + else + fprintf(stream, ">"); + break; + case END_ELEMENT_EVENT: + fprintf(stream, "</"); +#if SHOW_URIS + if(event->uri.ptr != 0) { + fprintf(stream, "{"); + output_text(&event->uri, stream); + fprintf(stream, "}"); + } else +#endif + if(event->prefix.ptr != 0) { + output_text(&event->prefix, stream); + fprintf(stream, ":"); + } + output_text(&event->name, stream); + fprintf(stream, ">"); + break; + case CHARACTERS_EVENT: + output_text(&event->value, stream); + break; + case CDATA_EVENT: + fprintf(stream, "<![CDATA["); + output_text(&event->value, stream); + fprintf(stream, "]]>"); + break; + case IGNORABLE_WHITESPACE_EVENT: + output_text(&event->value, stream); + break; + case COMMENT_EVENT: + fprintf(stream, "<!--"); + output_text(&event->value, stream); + fprintf(stream, "-->"); + break; + case PI_EVENT: + fprintf(stream, "<?"); + output_text(&event->name, stream); + if(event->value.ptr != 0) { + fprintf(stream, " "); + output_text(&event->value, stream); + } + fprintf(stream, "?>"); + break; + case ENTITY_REFERENCE_EVENT: + fprintf(stream, "&"); + output_text(&event->name, stream); + fprintf(stream, ";"); + break; + case DEC_CHAR_REFERENCE_EVENT: + fprintf(stream, "&#"); + output_text(&event->name, stream); + fprintf(stream, ";"); + break; + case HEX_CHAR_REFERENCE_EVENT: + fprintf(stream, "&#x"); + output_text(&event->name, stream); + fprintf(stream, ";"); + break; + case ENTITY_REFERENCE_START_EVENT: +#if SHOW_ENTITIES + fprintf(stream, "&"); + output_text(&event->name, stream); + fprintf(stream, ";("); +#endif + break; + case ENTITY_REFERENCE_END_EVENT: +#if SHOW_ENTITIES + fprintf(stream, ")"); +#endif + break; + case START_EXTERNAL_ENTITY_EVENT: + case END_EXTERNAL_ENTITY_EVENT: + case NO_EVENT: + break; + } +} + Added: trunk/faxpp/examples/output_event.h =================================================================== --- trunk/faxpp/examples/output_event.h (rev 0) +++ trunk/faxpp/examples/output_event.h 2008-03-06 02:04:04 UTC (rev 40) @@ -0,0 +1,27 @@ +/* + * Copyright 2007 Doxological Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef OUTPUT_EVENT_H +#define OUTPUT_EVENT_H + +#include <stdio.h> + +#include <faxpp/parser.h> + +void output_event(const FAXPP_Event *event, FILE *stream); +void output_attr_value(const FAXPP_AttrValue *atval, FILE *stream); + +#endif Modified: trunk/faxpp/examples/parser_example.c =================================================================== --- trunk/faxpp/examples/parser_example.c 2008-03-03 14:32:32 UTC (rev 39) +++ trunk/faxpp/examples/parser_example.c 2008-03-06 02:04:04 UTC (rev 40) @@ -21,6 +21,8 @@ #include <sys/time.h> #include <faxpp/parser.h> +#include "entity_resolver.h" +#include "output_event.h" #define BUFFER_SIZE 10 #define MSECS_IN_SECS 1000000 @@ -33,294 +35,6 @@ return (timev.tv_sec * MSECS_IN_SECS) + timev.tv_usec; } -void -output_text(const FAXPP_Text *text, FILE *stream) -{ - char *buffer = (char*)text->ptr; - char *buffer_end = buffer + text->len; - - while(buffer < buffer_end) { - putc(*buffer++, stream); - } -} - -void -output_escaped_attr_text(const FAXPP_Text *text, FILE *stream) -{ - char *buffer = (char*)text->ptr; - char *buffer_end = buffer + text->len; - - while(buffer < buffer_end) { - if(*buffer == '&') { - fprintf(stream, "&"); - } - else if(*buffer == '<') { - fprintf(stream, "<"); - } - else if(*buffer == '"') { - fprintf(stream, """); - } - else { - putc(*buffer, stream); - } - ++buffer; - } -} - -#define SHOW_URIS 0 -#define SHOW_ENTITIES 0 - -void -output_event(const FAXPP_Event *event, FILE *stream) -{ - int i; - FAXPP_AttrValue *atval; - - switch(event->type) { - case START_DOCUMENT_EVENT: - if(event->version.ptr != 0) { - fprintf(stream, "<?xml version=\""); - output_text(&event->version, stream); - if(event->encoding.ptr != 0) { - fprintf(stream, "\" encoding=\""); - output_text(&event->encoding, stream); - } - if(event->standalone.ptr != 0) { - fprintf(stream, "\" standalone=\""); - output_text(&event->standalone, stream); - } - fprintf(stream, "\"?>"); - } - break; - case END_DOCUMENT_EVENT: - break; - case DOCTYPE_EVENT: - fprintf(stream, "<!DOCTYPE "); - - if(event->prefix.ptr != 0) { - output_text(&event->prefix, stream); - fprintf(stream, ":"); - } - output_text(&event->name, stream); - - if(event->system.ptr != 0) { - if(event->public.ptr != 0) { - fprintf(stream, " PUBLIC \""); - output_text(&event->public, stream); - fprintf(stream, "\" \""); - output_text(&event->system, stream); - fprintf(stream, "\""); - } - else { - fprintf(stream, " SYSTEM \""); - output_text(&event->system, stream); - fprintf(stream, "\""); - } - } - fprintf(stream, ">"); - break; - case START_ELEMENT_EVENT: - case SELF_CLOSING_ELEMENT_EVENT: - fprintf(stream, "<"); -#if SHOW_URIS - if(event->uri.ptr != 0) { - fprintf(stream, "{"); - output_text(&event->uri, stream); - fprintf(stream, "}"); - } else -#endif - if(event->prefix.ptr != 0) { - output_text(&event->prefix, stream); - fprintf(stream, ":"); - } - output_text(&event->name, stream); - - for(i = 0; i < event->attr_count; ++i) { - fprintf(stream, " "); -#if SHOW_URIS - if(event->attrs[i].uri.ptr != 0) { - fprintf(stream, "{"); - output_text(&event->attrs[i].uri, stream); - fprintf(stream, "}"); - } else -#endif - if(event->attrs[i].prefix.ptr != 0) { - output_text(&event->attrs[i].prefix, stream); - fprintf(stream, ":"); - } - output_text(&event->attrs[i].name, stream); - fprintf(stream, "=\""); - - atval = &event->attrs[i].value; - while(atval) { - switch(atval->type) { - case CHARACTERS_EVENT: - output_escaped_attr_text(&atval->value, stream); - break; - case ENTITY_REFERENCE_EVENT: - fprintf(stream, "&"); - output_text(&atval->name, stream); - fprintf(stream, ";"); - break; - case DEC_CHAR_REFERENCE_EVENT: - fprintf(stream, "&#"); - output_text(&atval->name, stream); - fprintf(stream, ";"); - break; - case HEX_CHAR_REFERENCE_EVENT: - fprintf(stream, "&#x"); - output_text(&atval->name, stream); - fprintf(stream, ";"); - break; - case ENTITY_REFERENCE_START_EVENT: -#if SHOW_ENTITIES - fprintf(stream, "&"); - output_text(&atval->name, stream); - fprintf(stream, ";("); -#endif - break; - case ENTITY_REFERENCE_END_EVENT: -#if SHOW_ENTITIES - fprintf(stream, ")"); -#endif - break; - default: - break; - } - atval = atval->next; - } - - fprintf(stream, "\""); - } - - if(event->type == SELF_CLOSING_ELEMENT_EVENT) - fprintf(stream, "/>"); - else - fprintf(stream, ">"); - break; - case END_ELEMENT_EVENT: - fprintf(stream, "</"); -#if SHOW_URIS - if(event->uri.ptr != 0) { - fprintf(stream, "{"); - output_text(&event->uri, stream); - fprintf(stream, "}"); - } else -#endif - if(event->prefix.ptr != 0) { - output_text(&event->prefix, stream); - fprintf(stream, ":"); - } - output_text(&event->name, stream); - fprintf(stream, ">"); - break; - case CHARACTERS_EVENT: - output_text(&event->value, stream); - break; - case CDATA_EVENT: - fprintf(stream, "<![CDATA["); - output_text(&event->value, stream); - fprintf(stream, "]]>"); - break; - case IGNORABLE_WHITESPACE_EVENT: - output_text(&event->value, stream); - break; - case COMMENT_EVENT: - fprintf(stream, "<!--"); - output_text(&event->value, stream); - fprintf(stream, "-->"); - break; - case PI_EVENT: - fprintf(stream, "<?"); - output_text(&event->name, stream); - if(event->value.ptr != 0) { - fprintf(stream, " "); - output_text(&event->value, stream); - } - fprintf(stream, "?>"); - break; - case ENTITY_REFERENCE_EVENT: - fprintf(stream, "&"); - output_text(&event->name, stream); - fprintf(stream, ";"); - break; - case DEC_CHAR_REFERENCE_EVENT: - fprintf(stream, "&#"); - output_text(&event->name, stream); - fprintf(stream, ";"); - break; - case HEX_CHAR_REFERENCE_EVENT: - fprintf(stream, "&#x"); - output_text(&event->name, stream); - fprintf(stream, ";"); - break; - case ENTITY_REFERENCE_START_EVENT: -#if SHOW_ENTITIES - fprintf(stream, "&"); - output_text(&event->name, stream); - fprintf(stream, ";("); -#endif - break; - case ENTITY_REFERENCE_END_EVENT: -#if SHOW_ENTITIES - fprintf(stream, ")"); -#endif - break; - case START_EXTERNAL_ENTITY_EVENT: - case END_EXTERNAL_ENTITY_EVENT: - case NO_EVENT: - break; - } -} - -char *resolve_paths(const char *base, const char *path, unsigned int path_len) -{ - unsigned int base_len = strlen(base); - - char *result = malloc(base_len + path_len + 1); - char *ptr = result; - - strcpy(ptr, base); - ptr += base_len - 1; - - while(ptr >= result && *ptr != '/') { - --ptr; - } - ++ptr; - - strncpy(ptr, path, path_len); - ptr += path_len; - *ptr = 0; - - return result; -} - -static unsigned int file_read_callback(void *userData, void *buffer, unsigned int length) -{ - unsigned int result = fread(buffer, 1, length, (FILE*)userData); - if(result < length) { - fclose((FILE*)userData); - } - return result; -} - -static FAXPP_Error entity_callback(void *userData, FAXPP_Parser *parser, - const FAXPP_Text *system, const FAXPP_Text *public) -{ - FILE *file; - char *path; - - path = resolve_paths((char*)userData, (char*)system->ptr, system->len); - file = fopen(path, "r"); - if(file == 0) { - printf("Open of '%s' failed: %s\n", path, strerror(errno)); - return CANT_LOCATE_EXTERNAL_ENTITY; - } - free(path); - - return FAXPP_parse_external_entity_callback(parser, file_read_callback, file); -} - int main(int argc, char **argv) { @@ -342,7 +56,7 @@ for(i = 1; i < argc; ++i) { - FAXPP_set_external_entity_callback(parser, entity_callback, argv[i]); + FAXPP_set_external_entity_callback(parser, entity_callback, 0); startTime = getTime(); @@ -353,6 +67,9 @@ } err = FAXPP_init_parse_callback(parser, file_read_callback, file); + if(err == NO_ERROR) + err = FAXPP_set_base_uri_str(parser, argv[i]); + if(err != NO_ERROR) { printf("ERROR: %s\n", FAXPP_err_to_string(err)); exit(1); Modified: trunk/faxpp/include/faxpp/event.h =================================================================== --- trunk/faxpp/include/faxpp/event.h 2008-03-03 14:32:32 UTC (rev 39) +++ trunk/faxpp/include/faxpp/event.h 2008-03-06 02:04:04 UTC (rev 40) @@ -99,8 +99,8 @@ FAXPP_Text encoding; ///< The version of the event. \details Present for event types START_DOCUMENT_EVENT and START_EXTERNAL_ENTITY_EVENT FAXPP_Text standalone; ///< The version of the event. \details Present only for the START_DOCUMENT_EVENT event type - FAXPP_Text system; ///< The system literal of the event. \details Present for event types DOCTYPE_EVENT and ENTITY_REFERENCE_EVENT - FAXPP_Text public; ///< The public ID literal of the event. \details Present for event types DOCTYPE_EVENT and ENTITY_REFERENCE_EVENT + FAXPP_Text system_id; ///< The system literal of the event. \details Present for event types DOCTYPE_EVENT and ENTITY_REFERENCE_EVENT + FAXPP_Text public_id; ///< The public ID literal of the event. \details Present for event types DOCTYPE_EVENT and ENTITY_REFERENCE_EVENT unsigned int line; ///< The line number of the start of the event unsigned int column; ///< The column number of the start of the event Modified: trunk/faxpp/include/faxpp/parser.h =================================================================== --- trunk/faxpp/include/faxpp/parser.h 2008-03-03 14:32:32 UTC (rev 39) +++ trunk/faxpp/include/faxpp/parser.h 2008-03-06 02:04:04 UTC (rev 40) @@ -90,6 +90,12 @@ WELL_FORMED_PARSE_MODE } FAXPP_ParseMode; +/// The type of external entity to parse +typedef enum { + EXTERNAL_PARSED_ENTITY = 0, ///< An external parsed entity + EXTERNAL_SUBSET_ENTITY = 1 ///< An external subset (DTD) +} FAXPP_EntityType; + /** * The function called when faxpp recieves a PREMATURE_END_OF_BUFFER error from the * tokenizer. The function should read the next chunk of input into the buffer provided, @@ -105,20 +111,23 @@ /** * The function called when faxpp finds a reference to an external parsed entity. The function should - * lcoate the entity using it's system and public indentifiers and call FAXPP_parse_external_entity(), + * locate the entity using it's system and public indentifiers and call FAXPP_parse_external_entity(), * FAXPP_parse_external_entity_callback() or FAXPP_parse_external_entity_file() to parse the external - * entity. + * entity. The base URI provided is the one supplied by the user using FAXPP_set_base_uri() or + * FAXPP_set_base_uri_str() for the file that the entity declaration was in. * * \param userData The user data supplied to the FAXPP_set_external_entity_callback() method * \param parser A pointer to the parser - * \param system The entity's system identifier - * \param public The entity's public identifier + * \param type The type of external entity to locate + * \param base_uri The base URI for the entity declaration + * \param system_id The entity's system identifier + * \param public_id The entity's public identifier * * \return NO_ERROR on success, DONT_PARSE_EXTERNAL_ENTITY to return an unexpanded ENTITY_REFERENCE_EVENT * event, otherwise another error code to halt parsing (most probably CANT_LOCATE_EXTERNAL_ENTITY). */ -typedef FAXPP_Error (*FAXPP_ExternalEntityCallback)(void *userData, FAXPP_Parser *parser, - const FAXPP_Text *system, const FAXPP_Text *public); +typedef FAXPP_Error (*FAXPP_ExternalEntityCallback)(void *userData, FAXPP_Parser *parser, FAXPP_EntityType type, + const FAXPP_Text *base_uri, const FAXPP_Text *system_id, const FAXPP_Text *public_id); /** * Creates a parser object @@ -221,6 +230,56 @@ void FAXPP_set_decode(FAXPP_Parser *parser, FAXPP_DecodeFunction decode); /** + * Gets the base URI for the file currently being parsed. This is set by the user + * using FAXPP_set_base_uri() or FAXPP_set_base_uri_str(). + * + * The base URI returned will be in the encoding that it was provided in to the + * FAXPP_set_base_uri() function. + * + * \param parser + * \return The current base URI + * + * \relatesalso FAXPP_Parser + */ +const FAXPP_Text *FAXPP_get_base_uri(const FAXPP_Parser *parser); + +/** + * Sets the base URI for the file currently being parsed. This is passed to the + * FAXPP_ExternalEntityCallback set using FAXPP_set_external_entity_callback(). + * + * A copy of the base_uri will be kept internally, so neither the FAXPP_Text object + * nor the buffer it points to need exist after a call to FAXPP_set_base_uri(). The + * encoding of the base URI is irrelevent to FAXPP - the base URI will be in the + * same encoding when it is passed back to the FAXPP_ExternalEntityCallback. + * + * \param parser + * \param base_uri The base URI + * + * \retval OUT_OF_MEMORY + * \retval NO_ERROR + * + * \relatesalso FAXPP_Parser + */ +FAXPP_Error FAXPP_set_base_uri(FAXPP_Parser *parser, const FAXPP_Text *base_uri); + +/** + * Sets the base URI for the file currently being parsed. This is passed to the + * FAXPP_ExternalEntityCallback set using FAXPP_set_external_entity_callback(). + * + * A copy of the base_uri will be kept internally, so the string need not exist + * after a call to FAXPP_set_base_uri_str(). + * + * \param parser + * \param base_uri The base URI + * + * \retval OUT_OF_MEMORY + * \retval NO_ERROR + * + * \relatesalso FAXPP_Parser + */ +FAXPP_Error FAXPP_set_base_uri_str(FAXPP_Parser *parser, const char *base_uri); + +/** * Sets the FAXPP_ExternalEntityCallback that the parser will call when it * encounters a reference to an external parsed entity. * @@ -302,6 +361,7 @@ * responsible for deleting the buffer. * * \param parser The parser to use + * \param type The type of external entity to parse * \param buffer A pointer to the start of the buffer to parse * \param length The length of the given buffer * \param done Set to non-zero if this is the last buffer from the external entity @@ -313,7 +373,7 @@ * * \relatesalso FAXPP_Parser */ -FAXPP_Error FAXPP_parse_external_entity(FAXPP_Parser *parser, void *buffer, unsigned int length, unsigned int done); +FAXPP_Error FAXPP_parse_external_entity(FAXPP_Parser *parser, FAXPP_EntityType type, void *buffer, unsigned int length, unsigned int done); /** * Interrupts parsing to parse the external entity from the given file. Any parsing @@ -326,6 +386,7 @@ * The user remains responsible for closing the file after parsing has ended. * * \param parser The parser to initialize + * \param type The type of external entity to parse * \param file The file descriptor of the file to parse * * \retval UNSUPPORTED_ENCODING If the encoding sniffing algorithm cannot recognize @@ -335,7 +396,7 @@ * * \relatesalso FAXPP_Parser */ -FAXPP_Error FAXPP_parse_external_entity_file(FAXPP_Parser *parser, FILE *file); +FAXPP_Error FAXPP_parse_external_entity_file(FAXPP_Parser *parser, FAXPP_EntityType type, FILE *file); /** * Interrupts parsing to parse the external entity using the given read callback. Any parsing @@ -345,6 +406,7 @@ * to. * * \param parser The parser to initialize + * \param type The type of external entity to parse * \param callback The read callback function to use to retrieve the parse input * \param userData The user data to be passed to the callback function when it is called * @@ -355,7 +417,7 @@ * * \relatesalso FAXPP_Parser */ -FAXPP_Error FAXPP_parse_external_entity_callback(FAXPP_Parser *parser, FAXPP_ReadCallback callback, void *userData); +FAXPP_Error FAXPP_parse_external_entity_callback(FAXPP_Parser *parser, FAXPP_EntityType type, FAXPP_ReadCallback callback, void *userData); /** * Instructs the parser to release any dependencies it has on it's current buffer. Modified: trunk/faxpp/include/faxpp/transcode.h =================================================================== --- trunk/faxpp/include/faxpp/transcode.h 2008-03-03 14:32:32 UTC (rev 39) +++ trunk/faxpp/include/faxpp/transcode.h 2008-03-06 02:04:04 UTC (rev 40) @@ -80,13 +80,13 @@ * * \see FAXPP_utf8_decode, FAXPP_utf8_encode */ -const FAXPP_Transcoder FAXPP_utf8_transcoder; +extern const FAXPP_Transcoder FAXPP_utf8_transcoder; /** * Transcoder to and from native endian UTF-16 * * \see FAXPP_utf16_native_decode, FAXPP_utf16_native_encode */ -const FAXPP_Transcoder FAXPP_utf16_native_transcoder; +extern const FAXPP_Transcoder FAXPP_utf16_native_transcoder; /** * Returns a string describing the given (built-in) decode function Modified: trunk/faxpp/src/char_classes.h =================================================================== --- trunk/faxpp/src/char_classes.h 2008-03-03 14:32:32 UTC (rev 39) +++ trunk/faxpp/src/char_classes.h 2008-03-06 02:04:04 UTC (rev 40) @@ -34,6 +34,6 @@ #define FAXPP_char_flags(ch) (((ch) < 256) ? FAXPP_char_flags_256[(ch)] : FAXPP_char_flags_impl((ch))) uint8_t FAXPP_char_flags_impl(Char32 c); -const uint8_t FAXPP_char_flags_256[256]; +extern const uint8_t FAXPP_char_flags_256[256]; #endif Modified: trunk/faxpp/src/doctype.c =================================================================== --- trunk/faxpp/src/doctype.c 2008-03-03 14:32:32 UTC (rev 39) +++ trunk/faxpp/src/doctype.c 2008-03-06 02:04:04 UTC (rev 40) @@ -101,7 +101,7 @@ return NO_ERROR; case '[': env->state = internal_subset_state; - env->in_internal_subset = 1; + env->internal_subset = 1; token_end_position(env); report_token(DOCTYPE_NAME_TOKEN, env); next_char(env); @@ -165,7 +165,7 @@ return NO_ERROR; case '[': env->state = internal_subset_state; - env->in_internal_subset = 1; + env->internal_subset = 1; token_end_position(env); report_token(DOCTYPE_NAME_TOKEN, env); next_char(env); @@ -207,7 +207,7 @@ break; case '[': env->state = internal_subset_state; - env->in_internal_subset = 1; + env->internal_subset = 1; break; case '>': base_state(env); @@ -471,7 +471,7 @@ break; case '[': env->state = internal_subset_state; - env->in_internal_subset = 1; + env->internal_subset = 1; break; case '>': base_state(env); @@ -495,7 +495,7 @@ switch(env->current_char) { case ']': env->state = doctype_end_state; - env->in_internal_subset = 0; + env->internal_subset = 0; break; WHITESPACE: break; @@ -617,3 +617,84 @@ return NO_ERROR; } +FAXPP_Error +external_subset_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + break; + case '%': + store_state(env); + env->state = parameter_entity_reference_state; + next_char(env); + token_start_position(env); + return NO_ERROR; + case '<': + env->state = external_subset_markup_state; + break; + default: + next_char(env); + return INVALID_DOCTYPE_DECL; + } + + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +external_subset_markup_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '?': + env->state = pi_name_start_state; + break; + case '!': + env->state = external_subset_decl_state; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_DTD_DECL; + } + + next_char(env); + token_start_position(env); + return NO_ERROR; +} + +FAXPP_Error +external_subset_decl_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '-': + env->state = comment_start_state2; + break; +/* // TBD conditional sections - jpcs */ +/* case '[': */ +/* break; */ + case 'E': + env->state = elementdecl_or_entitydecl_state; + break; + case 'A': + env->state = attlistdecl_initial_state1; + break; + case 'N': + env->state = notationdecl_initial_state1; + break; + LINE_ENDINGS + default: + env->state = comment_content_state; + token_start_position(env); + next_char(env); + return INVALID_START_OF_COMMENT; + } + next_char(env); + return NO_ERROR; +} + Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-03-03 14:32:32 UTC (rev 39) +++ trunk/faxpp/src/tokenizer_states.c 2008-03-06 02:04:04 UTC (rev 40) @@ -421,6 +421,9 @@ return "doctype_after_name_state"; else if(state == doctype_internal_subset_start_state) return "doctype_internal_subset_start_state"; + else if(state == doctype_end_state) + return "doctype_end_state"; + else if(state == internal_subset_state) return "internal_subset_state"; else if(state == internal_subset_state_en) @@ -429,9 +432,14 @@ return "internal_subset_markup_state"; else if(state == internal_subset_decl_state) return "internal_subset_decl_state"; - else if(state == doctype_end_state) - return "doctype_end_state"; + else if(state == external_subset_state) + return "external_subset_state"; + else if(state == external_subset_markup_state) + return "external_subset_markup_state"; + else if(state == external_subset_decl_state) + return "external_subset_decl_state"; + else if(state == system_id_initial_state1) return "system_id_initial_state1"; else if(state == system_id_initial_state2) @@ -668,20 +676,14 @@ switch(env->current_char) { case '<': env->state = xml_decl_or_markup_state; + next_char(env); break; - WHITESPACE: - base_state(env); - break; default: base_state(env); - if(!env->external_parsed_entity) { - next_char(env); - return NON_WHITESPACE_OUTSIDE_DOC_ELEMENT; - } + // No next_char break; } - next_char(env); return NO_ERROR; } Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-03-03 14:32:32 UTC (rev 39) +++ trunk/faxpp/src/tokenizer_states.h 2008-03-06 02:04:04 UTC (rev 40) @@ -258,12 +258,17 @@ FAXPP_Error doctype_name_seen_colon_state2(FAXPP_TokenizerEnv *env); FAXPP_Error doctype_after_name_state(FAXPP_TokenizerEnv *env); FAXPP_Error doctype_internal_subset_start_state(FAXPP_TokenizerEnv *env); +FAXPP_Error doctype_end_state(FAXPP_TokenizerEnv *env); + FAXPP_Error internal_subset_state(FAXPP_TokenizerEnv *env); FAXPP_Error internal_subset_state_en(FAXPP_TokenizerEnv *env); FAXPP_Error internal_subset_markup_state(FAXPP_TokenizerEnv *env); FAXPP_Error internal_subset_decl_state(FAXPP_TokenizerEnv *env); -FAXPP_Error doctype_end_state(FAXPP_TokenizerEnv *env); +FAXPP_Error external_subset_state(FAXPP_TokenizerEnv *env); +FAXPP_Error external_subset_markup_state(FAXPP_TokenizerEnv *env); +FAXPP_Error external_subset_decl_state(FAXPP_TokenizerEnv *env); + FAXPP_Error system_id_initial_state1(FAXPP_TokenizerEnv *env); FAXPP_Error system_id_initial_state2(FAXPP_TokenizerEnv *env); FAXPP_Error system_id_initial_state3(FAXPP_TokenizerEnv *env); @@ -427,8 +432,10 @@ (env)->state = parsed_entity_state; \ else if((env)->seen_doc_element) \ (env)->state = final_state; \ - else if((env)->in_internal_subset) \ + else if((env)->internal_subset) \ (env)->state = internal_subset_state; \ + else if((env)->external_subset) \ + (env)->state = external_subset_state; \ else (env)->state = initial_misc_state; \ } Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-03-03 14:32:32 UTC (rev 39) +++ trunk/faxpp/src/xml_parser.c 2008-03-06 02:04:04 UTC (rev 40) @@ -34,7 +34,7 @@ #define INITIAL_ELEMENT_INFO_BUFFER_SIZE 256 static FAXPP_Error nc_start_document_next_event(FAXPP_ParserEnv *env); -static FAXPP_Error nc_doctype_next_event(FAXPP_ParserEnv *env); +static FAXPP_Error nc_dtd_next_event(FAXPP_ParserEnv *env); static FAXPP_Error nc_next_event(FAXPP_ParserEnv *env); static FAXPP_Error nc_unsupported_encoding_next_event(FAXPP_ParserEnv *env); @@ -186,6 +186,45 @@ } } +const FAXPP_Text *FAXPP_get_base_uri(const FAXPP_Parser *env) +{ + FAXPP_TokenizerEnv *tokenizer = env->tenv; + while(tokenizer->prev) { + if(tokenizer->start_of_file) + break; + tokenizer = tokenizer->prev; + } + + return tokenizer->base_uri.ptr == 0 ? 0 : &tokenizer->base_uri; +} + +FAXPP_Error FAXPP_set_base_uri(FAXPP_Parser *env, const FAXPP_Text *base_uri) +{ + FAXPP_Error err; + + FAXPP_TokenizerEnv *tokenizer = env->tenv; + while(tokenizer->prev) { + if(tokenizer->start_of_file) + break; + tokenizer = tokenizer->prev; + } + + tokenizer->base_uri.ptr = env->entity_buffer.cursor; + + err = FAXPP_buffer_append(&env->entity_buffer, base_uri->ptr, base_uri->len); + if(err) return err; + + tokenizer->base_uri.len = env->entity_buffer.cursor - tokenizer->base_uri.ptr; + + return NO_ERROR; +} + +FAXPP_Error FAXPP_set_base_uri_str(FAXPP_Parser *parser, const char *base_uri) +{ + FAXPP_Text text = { (void*)base_uri, strlen(base_uri) }; + return FAXPP_set_base_uri(parser, &text); +} + void FAXPP_set_external_entity_callback(FAXPP_Parser *parser, FAXPP_ExternalEntityCallback callback, void *userData) { parser->external_entity_callback = callback; @@ -275,35 +314,59 @@ return FAXPP_init_tokenize(env->tenv, env->tenv->read_buffer, len, /*done*/len != env->tenv->read_buffer_length); } -FAXPP_Error FAXPP_parse_external_entity(FAXPP_Parser *env, void *buffer, unsigned int length, unsigned int done) +FAXPP_Error FAXPP_parse_external_entity(FAXPP_Parser *env, FAXPP_EntityType type, void *buffer, unsigned int length, unsigned int done) { - FAXPP_Error err = FAXPP_push_entity_tokenizer(&env->tenv, EXTERNAL_PARSED_ENTITY, buffer, length, done); + FAXPP_Error err = FAXPP_push_entity_tokenizer(&env->tenv, type, buffer, length, done); if(err != 0) return err; - env->tenv->no_pass_on_state = 1; - env->next_event = nc_start_document_next_event; + // Associate it with the relevent FAXPP_EntityInfo object + env->tenv->entity = env->event_entity; + env->tenv->start_of_entity = 1; + env->tenv->start_of_file = 1; + + switch(type) { + case EXTERNAL_PARSED_ENTITY: + env->next_event = nc_start_document_next_event; + break; + case EXTERNAL_SUBSET_ENTITY: + env->next_event = nc_dtd_next_event; + break; + } + return FAXPP_sniff_encoding(env->tenv); } -FAXPP_Error FAXPP_parse_external_entity_file(FAXPP_Parser *env, FILE *file) +FAXPP_Error FAXPP_parse_external_entity_file(FAXPP_Parser *env, FAXPP_EntityType type, FILE *file) { - return FAXPP_parse_external_entity_callback(env, p_file_read_callback, (void*)file); + return FAXPP_parse_external_entity_callback(env, type, p_file_read_callback, (void*)file); } -FAXPP_Error FAXPP_parse_external_entity_callback(FAXPP_Parser *env, FAXPP_ReadCallback callback, void *userData) +FAXPP_Error FAXPP_parse_external_entity_callback(FAXPP_Parser *env, FAXPP_EntityType type, FAXPP_ReadCallback callback, void *userData) { - FAXPP_Error err = FAXPP_push_entity_tokenizer(&env->tenv, EXTERNAL_PARSED_ENTITY, 0, 0, 0); + FAXPP_Error err = FAXPP_push_entity_tokenizer(&env->tenv, type, 0, 0, 0); if(err != 0) return err; err = p_allocate_buffer(env); if(err != 0) return err; + // Associate it with the relevent FAXPP_EntityInfo object + env->tenv->entity = env->event_entity; + env->tenv->read = callback; env->tenv->read_user_data = userData; - env->tenv->no_pass_on_state = 1; - env->next_event = nc_start_document_next_event; + env->tenv->start_of_entity = 1; + env->tenv->start_of_file = 1; + switch(type) { + case EXTERNAL_PARSED_ENTITY: + env->next_event = nc_start_document_next_event; + break; + case EXTERNAL_SUBSET_ENTITY: + env->next_event = nc_dtd_next_event; + break; + } + unsigned int len = env->tenv->read(env->tenv->read_user_data, env->tenv->read_buffer, env->tenv->read_buffer_length); err = FAXPP_continue_tokenize(env->tenv, env->tenv->read_buffer, len, /*done*/len != env->tenv->read_buffer_length); @@ -377,8 +440,8 @@ p_text_change_buffer(buffer, newBuffer, &env->event.version); p_text_change_buffer(buffer, newBuffer, &env->event.encoding); p_text_change_buffer(buffer, newBuffer, &env->event.standalone); - p_text_change_buffer(buffer, newBuffer, &env->event.system); - p_text_change_buffer(buffer, newBuffer, &env->event.public); + p_text_change_buffer(buffer, newBuffer, &env->event.system_id); + p_text_change_buffer(buffer, newBuffer, &env->event.public_id); for(i = 0; i < env->event.attr_count; ++i) { p_text_change_buffer(buffer, newBuffer, &env->event.attrs[i].prefix); @@ -397,12 +460,15 @@ { FAXPP_EntityInfo *ent; FAXPP_EntityValue *entv; + FAXPP_TokenizerEnv *tokenizer; FAXPP_ParserEnv *env = (FAXPP_ParserEnv*)userData; + // Change the general entities ent = env->general_entities; while(ent) { p_text_change_buffer(buffer, newBuffer, &ent->name); + p_text_change_buffer(buffer, newBuffer, &ent->base_uri); entv = ent->value; while(entv) { @@ -413,9 +479,11 @@ ent = ent->next; } + // Change the parameter entities ent = env->parameter_entities; while(ent) { p_text_change_buffer(buffer, newBuffer, &ent->name); + p_text_change_buffer(buffer, newBuffer, &ent->base_uri); entv = ent->value; while(entv) { @@ -425,6 +493,13 @@ ent = ent->next; } + + tokenizer = env->tenv; + while(tokenizer) { + p_text_change_buffer(buffer, newBuffer, &tokenizer->base_uri); + + tokenizer = tokenizer->prev; + } } #define p_move_text_to_buffer(env, text, buf) \ @@ -457,8 +532,8 @@ p_move_text_to_buffer(env, &env->event.version, &env->event_buffer); p_move_text_to_buffer(env, &env->event.encoding, &env->event_buffer); p_move_text_to_buffer(env, &env->event.standalone, &env->event_buffer); - p_move_text_to_buffer(env, &env->event.system, &env->event_buffer); - p_move_text_to_buffer(env, &env->event.public, &env->event_buffer); + p_move_text_to_buffer(env, &env->event.system_id, &env->event_buffer); + p_move_text_to_buffer(env, &env->event.public_id, &env->event_buffer); for(i = 0; i < env->event.attr_count; ++i) { p_move_text_to_buffer(env, &env->event.attrs[i].prefix, &env->event_buffer); @@ -539,9 +614,9 @@ if((err) == PREMATURE_END_OF_BUFFER) { \ (env)->event.type = NO_EVENT; \ (err) = p_read_more((env)); \ - if((err) != NO_ERROR) goto error; \ - if((env)->event.type != NO_EVENT) return NO_ERROR; \ - } else goto error; \ + if((err) == NO_ERROR) return NO_ERROR; \ + } \ + goto error; \ } \ } @@ -835,12 +910,14 @@ env->event.standalone.ptr = 0; env->event.standalone.len = 0; - env->event.system.ptr = 0; - env->event.system.len = 0; - env->event.public.ptr = 0; - env->event.public.len = 0; + env->event.system_id.ptr = 0; + env->event.system_id.len = 0; + env->event.public_id.ptr = 0; + env->event.public_id.len = 0; env->event.line = 0; + + env->event_entity = 0; } static FAXPP_Error p_read_more(FAXPP_ParserEnv *env) @@ -872,22 +949,25 @@ return PREMATURE_END_OF_BUFFER; // Output the ENTITY_REFERENCE_END_EVENT - if(env->tenv->no_pass_on_state) { - if(env->tenv->attr_entity && env->tenv->entity) { + if(env->tenv->start_of_entity) { + if(env->tenv->attr_entity) { err = p_set_attr_value_name_from_entity(env->current_attr, env, ENTITY_REFERENCE_END_EVENT, env->tenv->entity); if(err) return err; } - else if(env->tenv->element_entity || env->tenv->external_parsed_entity) { + else if(env->tenv->element_entity) { p_reset_event(env); - if(env->tenv->entity) { - p_set_text_from_text(&env->event.name, &env->tenv->entity->name); - p_set_location(&env->event, env->tenv->entity); - env->event.type = ENTITY_REFERENCE_END_EVENT; - } - else { - env->event.type = END_EXTERNAL_ENTITY_EVENT; - } + p_set_text_from_text(&env->event.name, &env->tenv->entity->name); + p_set_location(&env->event, env->tenv->entity); + env->event.type = ENTITY_REFERENCE_END_EVENT; } + else if(env->tenv->external_parsed_entity) { + p_reset_event(env); + env->event.type = END_EXTERNAL_ENTITY_EVENT; + } + else if(env->tenv->external_subset) { + // Go back to parsing the document content + env->next_event = env->main_next_event; + } } // Also checks that the tokenizer is in the correct state @@ -960,11 +1040,18 @@ p_copy_text_from_token(&env->event.standalone, env, /*useTokenBuffer*/0); break; case XML_DECL_END_TOKEN: - env->next_event = nc_unsupported_encoding_next_event; - if(env->tenv->external_parsed_entity) - env->event.type = START_EXTERNAL_ENTITY_EVENT; - else env->event.type = START_DOCUMENT_EVENT; + if(env->tenv->external_subset) { + // TBD event for start of external subset - jpcs + env->next_event = nc_dtd_next_event; + } + else { + env->next_event = nc_unsupported_encoding_next_event; + if(env->tenv->external_parsed_entity) + env->event.type = START_EXTERNAL_ENTITY_EVENT; + else env->event.type = START_DOCUMENT_EVENT; + } + // Check the encoding string against our internally supported encodings if(env->tenv->user_provided_decode || env->event.encoding.ptr == 0) { env->next_event = env->main_next_event; @@ -1049,6 +1136,7 @@ } *list = ent; + p_set_text_from_text(&ent->base_uri, FAXPP_get_base_uri(env)); p_copy_text_from_token(&ent->name, env, /*useTokenBuffer*/0); p_set_location_from_token(ent, env); @@ -1093,7 +1181,7 @@ return list; } -static FAXPP_Error p_parse_entity_impl(FAXPP_ParserEnv *env, FAXPP_EntityInfo *ent, FAXPP_EntityParseState state, FAXPP_EntityInfo **initial_entity) +static FAXPP_Error p_parse_internal_entity(FAXPP_ParserEnv *env, FAXPP_EntityInfo *ent, FAXPP_EntityParseState state, FAXPP_EntityInfo **initial_entity) { FAXPP_EntityValue *entv; FAXPP_Error err; @@ -1102,7 +1190,7 @@ entv = ent->value; while(entv) { if(entv->entity_ref) { - err = p_parse_entity_impl(env, entv->entity_ref, state, initial_entity); + err = p_parse_internal_entity(env, entv->entity_ref, state, initial_entity); if(err) return err; } else { @@ -1117,7 +1205,7 @@ // Set the entity on the first new tokenizer if(*initial_entity) { - env->tenv->no_pass_on_state = 1; + env->tenv->start_of_entity = 1; env->tenv->entity = *initial_entity; *initial_entity = 0; } @@ -1129,6 +1217,33 @@ return NO_ERROR; } +static FAXPP_Error p_parse_external_entity(FAXPP_ParserEnv *env, FAXPP_EntityInfo *ent, FAXPP_EntityType type) +{ + FAXPP_EntityValue *entv; + FAXPP_Error err; + + env->event_entity = ent; + + entv = ent->value; + while(entv) { + switch(entv->type) { + case SYSTEM_LITERAL_TOKEN: p_set_text_from_text(&env->event.system_id, &entv->value); break; + case PUBID_LITERAL_TOKEN: p_set_text_from_text(&env->event.public_id, &entv->value); break; + default: break; + } + entv = entv->prev; + } + + err = DONT_PARSE_EXTERNAL_ENTITY; + if(env->external_entity_callback) { + err = env->external_entity_callback(env->external_entity_user_data, env, type, + ent->base_uri.ptr == 0 ? 0 : &ent->base_uri, + &env->event.system_id, &env->event.public_id); + } + + return err; +} + static FAXPP_Error p_parse_entity(FAXPP_ParserEnv *env, FAXPP_EntityInfo *ent, FAXPP_EntityParseState state) { // Check for a recursive entity @@ -1140,7 +1255,11 @@ tokenizer = tokenizer->prev; } - return p_parse_entity_impl(env, ent, state, &ent); + if(ent->external) { + return p_parse_external_entity(env, ent, state - INTERNAL_DIFF); + } + + return p_parse_internal_entity(env, ent, state, &ent); } static Char32 p_dec_char_ref_value(const FAXPP_Text *text, FAXPP_ParserEnv *env) @@ -1181,7 +1300,7 @@ return result; } -static FAXPP_Error nc_doctype_next_event(FAXPP_ParserEnv *env) +static FAXPP_Error nc_dtd_next_event(FAXPP_ParserEnv *env) { FAXPP_EntityInfo *ent; FAXPP_EntityValue *entv; @@ -1223,7 +1342,7 @@ p_copy_text_from_token(&entv->value, env, /*useTokenBuffer*/0); } else { - p_copy_text_from_token(&env->event.system, env, /*useTokenBuffer*/0); + p_copy_text_from_token(&env->event.system_id, env, /*useTokenBuffer*/0); } break; case PUBID_LITERAL_TOKEN: @@ -1236,7 +1355,7 @@ p_copy_text_from_token(&entv->value, env, /*useTokenBuffer*/0); } else { - p_copy_text_from_token(&env->event.public, env, /*useTokenBuffer*/0); + p_copy_text_from_token(&env->event.public_id, env, /*useTokenBuffer*/0); } break; @@ -1342,15 +1461,17 @@ } if(env->current_entity) { - // TBD - external subset - jpcs - // [WFC: PEs in Internal Subset] - err = ILLEGAL_PARAMETER_ENTITY; - goto error; + if(env->tenv->internal_subset) { + // [WFC: PEs in Internal Subset] + err = ILLEGAL_PARAMETER_ENTITY; + goto error; + } -/* entv = p_add_entity_value(env); */ -/* if(!entv) goto out_of_memory; */ + // TBD What if it's not in an entity value - jpcs + entv = p_add_entity_value(env); + if(!entv) goto out_of_memory; -/* entv->entity_ref = ent; */ + entv->entity_ref = ent; } else { err = p_parse_entity(env, ent, INTERNAL_DTD_ENTITY); @@ -1360,6 +1481,15 @@ case DOCTYPE_END_TOKEN: env->next_event = env->main_next_event; + + if(env->external_entity_callback) { + err = env->external_entity_callback(env->external_entity_user_data, env, EXTERNAL_SUBSET_ENTITY, + FAXPP_get_base_uri(env), + &env->event.system_id, &env->event.public_id); + if(err == DONT_PARSE_EXTERNAL_ENTITY) err = 0; + if(err) goto error; + } + env->event.type = DOCTYPE_EVENT; return NO_ERROR; @@ -1404,7 +1534,6 @@ static FAXPP_Error nc_next_event(FAXPP_ParserEnv *env) { FAXPP_EntityInfo *ent; - FAXPP_EntityValue *entv; Char32 ch; FAXPP_Error err = 0; @@ -1417,7 +1546,7 @@ p_reset_event(env); p_set_event_location_from_token(env); env->tenv->buffered_token = 1; - env->next_event = nc_doctype_next_event; + env->next_event = nc_dtd_next_event; return NO_ERROR; case DOCTYPE_END_TOKEN: ... [truncated message content] |
From: <jp...@us...> - 2008-03-03 14:32:33
|
Revision: 39 http://faxpp.svn.sourceforge.net/faxpp/?rev=39&view=rev Author: jpcs Date: 2008-03-03 06:32:32 -0800 (Mon, 03 Mar 2008) Log Message: ----------- Implemented entity replacement, using a stack of tokenizers to parse the different entity values. Implemented external entity resolution and parsing using API methods or a callback function. Fixed a number of bugs. Modified Paths: -------------- trunk/faxpp/Makefile.am trunk/faxpp/Makefile.in trunk/faxpp/configure trunk/faxpp/configure.in trunk/faxpp/docs/Doxyfile.api trunk/faxpp/examples/parser_example.c trunk/faxpp/examples/tokenizer_example.c trunk/faxpp/include/faxpp/error.h trunk/faxpp/include/faxpp/event.h trunk/faxpp/include/faxpp/parser.h trunk/faxpp/include/faxpp/tokenizer.h trunk/faxpp/include/faxpp/transcode.h trunk/faxpp/src/attr_states.h trunk/faxpp/src/buffer.c trunk/faxpp/src/buffer.h trunk/faxpp/src/char_classes.c trunk/faxpp/src/char_classes.h trunk/faxpp/src/doctype.c trunk/faxpp/src/element_states.h trunk/faxpp/src/error.c trunk/faxpp/src/event.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/transcode.c trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_parser.h trunk/faxpp/src/xml_tokenizer.c trunk/faxpp/src/xml_tokenizer.h trunk/faxpp/src/xmldecl.c trunk/faxpp/tests/xmlconf_runner.c Modified: trunk/faxpp/Makefile.am =================================================================== --- trunk/faxpp/Makefile.am 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/Makefile.am 2008-03-03 14:32:32 UTC (rev 39) @@ -7,7 +7,7 @@ lib_LTLIBRARIES = libfaxpp.la # Library version, see http://sourceware.org/autobook/autobook/autobook_91.html -libfaxpp_la_LDFLAGS = -version-info 1:0:0 -no-undefined +libfaxpp_la_LDFLAGS = -version-info 2:0:0 -no-undefined libfaxpp_la_SOURCES = \ src/attr_states.h \ Modified: trunk/faxpp/Makefile.in =================================================================== --- trunk/faxpp/Makefile.in 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/Makefile.in 2008-03-03 14:32:32 UTC (rev 39) @@ -229,7 +229,7 @@ lib_LTLIBRARIES = libfaxpp.la # Library version, see http://sourceware.org/autobook/autobook/autobook_91.html -libfaxpp_la_LDFLAGS = -version-info 1:0:0 -no-undefined +libfaxpp_la_LDFLAGS = -version-info 2:0:0 -no-undefined libfaxpp_la_SOURCES = \ src/attr_states.h \ src/buffer.c \ Modified: trunk/faxpp/configure =================================================================== --- trunk/faxpp/configure 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/configure 2008-03-03 14:32:32 UTC (rev 39) @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.61 for faxpp 0.2. +# Generated by GNU Autoconf 2.61 for faxpp 0.3. # # Report bugs to <jo...@sn...>. # @@ -728,8 +728,8 @@ # Identity of this package. PACKAGE_NAME='faxpp' PACKAGE_TARNAME='faxpp' -PACKAGE_VERSION='0.2' -PACKAGE_STRING='faxpp 0.2' +PACKAGE_VERSION='0.3' +PACKAGE_STRING='faxpp 0.3' PACKAGE_BUGREPORT='jo...@sn...' ac_unique_file="include/faxpp/parser.h" @@ -1390,7 +1390,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures faxpp 0.2 to adapt to many kinds of systems. +\`configure' configures faxpp 0.3 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1460,7 +1460,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of faxpp 0.2:";; + short | recursive ) echo "Configuration of faxpp 0.3:";; esac cat <<\_ACEOF @@ -1564,7 +1564,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -faxpp configure 0.2 +faxpp configure 0.3 generated by GNU Autoconf 2.61 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1578,7 +1578,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by faxpp $as_me 0.2, which was +It was created by faxpp $as_me 0.3, which was generated by GNU Autoconf 2.61. Invocation command line was $ $0 $@ @@ -2268,7 +2268,7 @@ # Define the identity of the package. PACKAGE=faxpp - VERSION=0.2 + VERSION=0.3 # Some tools Automake needs. @@ -21442,7 +21442,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by faxpp $as_me 0.2, which was +This file was extended by faxpp $as_me 0.3, which was generated by GNU Autoconf 2.61. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -21495,7 +21495,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -faxpp config.status 0.2 +faxpp config.status 0.3 configured by $0, generated by GNU Autoconf 2.61, with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" Modified: trunk/faxpp/configure.in =================================================================== --- trunk/faxpp/configure.in 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/configure.in 2008-03-03 14:32:32 UTC (rev 39) @@ -2,9 +2,9 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ(2.59) -AC_INIT(faxpp, 0.2, jo...@sn...) +AC_INIT(faxpp, 0.3, jo...@sn...) AC_CONFIG_AUX_DIR(autotools) -AM_INIT_AUTOMAKE(faxpp, 0.2, jo...@sn...) +AM_INIT_AUTOMAKE(faxpp, 0.3, jo...@sn...) AC_CONFIG_SRCDIR([include/faxpp/parser.h]) AM_CONFIG_HEADER([src/config.h]) Modified: trunk/faxpp/docs/Doxyfile.api =================================================================== --- trunk/faxpp/docs/Doxyfile.api 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/docs/Doxyfile.api 2008-03-03 14:32:32 UTC (rev 39) @@ -23,7 +23,7 @@ # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = 0.2 +PROJECT_NUMBER = 0.3 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. Modified: trunk/faxpp/examples/parser_example.c =================================================================== --- trunk/faxpp/examples/parser_example.c 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/examples/parser_example.c 2008-03-03 14:32:32 UTC (rev 39) @@ -44,7 +44,31 @@ } } +void +output_escaped_attr_text(const FAXPP_Text *text, FILE *stream) +{ + char *buffer = (char*)text->ptr; + char *buffer_end = buffer + text->len; + + while(buffer < buffer_end) { + if(*buffer == '&') { + fprintf(stream, "&"); + } + else if(*buffer == '<') { + fprintf(stream, "<"); + } + else if(*buffer == '"') { + fprintf(stream, """); + } + else { + putc(*buffer, stream); + } + ++buffer; + } +} + #define SHOW_URIS 0 +#define SHOW_ENTITIES 0 void output_event(const FAXPP_Event *event, FILE *stream) @@ -70,6 +94,31 @@ break; case END_DOCUMENT_EVENT: break; + case DOCTYPE_EVENT: + fprintf(stream, "<!DOCTYPE "); + + if(event->prefix.ptr != 0) { + output_text(&event->prefix, stream); + fprintf(stream, ":"); + } + output_text(&event->name, stream); + + if(event->system.ptr != 0) { + if(event->public.ptr != 0) { + fprintf(stream, " PUBLIC \""); + output_text(&event->public, stream); + fprintf(stream, "\" \""); + output_text(&event->system, stream); + fprintf(stream, "\""); + } + else { + fprintf(stream, " SYSTEM \""); + output_text(&event->system, stream); + fprintf(stream, "\""); + } + } + fprintf(stream, ">"); + break; case START_ELEMENT_EVENT: case SELF_CLOSING_ELEMENT_EVENT: fprintf(stream, "<"); @@ -106,7 +155,7 @@ while(atval) { switch(atval->type) { case CHARACTERS_EVENT: - output_text(&atval->value, stream); + output_escaped_attr_text(&atval->value, stream); break; case ENTITY_REFERENCE_EVENT: fprintf(stream, "&"); @@ -123,6 +172,18 @@ output_text(&atval->name, stream); fprintf(stream, ";"); break; + case ENTITY_REFERENCE_START_EVENT: +#if SHOW_ENTITIES + fprintf(stream, "&"); + output_text(&atval->name, stream); + fprintf(stream, ";("); +#endif + break; + case ENTITY_REFERENCE_END_EVENT: +#if SHOW_ENTITIES + fprintf(stream, ")"); +#endif + break; default: break; } @@ -193,11 +254,73 @@ output_text(&event->name, stream); fprintf(stream, ";"); break; + case ENTITY_REFERENCE_START_EVENT: +#if SHOW_ENTITIES + fprintf(stream, "&"); + output_text(&event->name, stream); + fprintf(stream, ";("); +#endif + break; + case ENTITY_REFERENCE_END_EVENT: +#if SHOW_ENTITIES + fprintf(stream, ")"); +#endif + break; + case START_EXTERNAL_ENTITY_EVENT: + case END_EXTERNAL_ENTITY_EVENT: case NO_EVENT: break; } } +char *resolve_paths(const char *base, const char *path, unsigned int path_len) +{ + unsigned int base_len = strlen(base); + + char *result = malloc(base_len + path_len + 1); + char *ptr = result; + + strcpy(ptr, base); + ptr += base_len - 1; + + while(ptr >= result && *ptr != '/') { + --ptr; + } + ++ptr; + + strncpy(ptr, path, path_len); + ptr += path_len; + *ptr = 0; + + return result; +} + +static unsigned int file_read_callback(void *userData, void *buffer, unsigned int length) +{ + unsigned int result = fread(buffer, 1, length, (FILE*)userData); + if(result < length) { + fclose((FILE*)userData); + } + return result; +} + +static FAXPP_Error entity_callback(void *userData, FAXPP_Parser *parser, + const FAXPP_Text *system, const FAXPP_Text *public) +{ + FILE *file; + char *path; + + path = resolve_paths((char*)userData, (char*)system->ptr, system->len); + file = fopen(path, "r"); + if(file == 0) { + printf("Open of '%s' failed: %s\n", path, strerror(errno)); + return CANT_LOCATE_EXTERNAL_ENTITY; + } + free(path); + + return FAXPP_parse_external_entity_callback(parser, file_read_callback, file); +} + int main(int argc, char **argv) { @@ -211,7 +334,7 @@ exit(-1); } - FAXPP_Parser *parser = FAXPP_create_parser(WELL_FORMED_PARSE_MODE, FAXPP_utf8_encode); + FAXPP_Parser *parser = FAXPP_create_parser(WELL_FORMED_PARSE_MODE, FAXPP_utf8_transcoder); if(parser == 0) { printf("ERROR: out of memory\n"); exit(1); @@ -219,15 +342,17 @@ for(i = 1; i < argc; ++i) { + FAXPP_set_external_entity_callback(parser, entity_callback, argv[i]); + startTime = getTime(); file = fopen(argv[i], "r"); if(file == 0) { - printf("Open failed: %s\n", strerror(errno)); + printf("Open of '%s' failed: %s\n", argv[i], strerror(errno)); exit(1); } - err = FAXPP_init_parse_file(parser, file); + err = FAXPP_init_parse_callback(parser, file_read_callback, file); if(err != NO_ERROR) { printf("ERROR: %s\n", FAXPP_err_to_string(err)); exit(1); @@ -245,8 +370,6 @@ FAXPP_get_error_column(parser), FAXPP_err_to_string(err)); } - fclose(file); - printf("Time taken: %gms\n", ((double)(getTime() - startTime) / MSECS_IN_SECS * 1000)); } Modified: trunk/faxpp/examples/tokenizer_example.c =================================================================== --- trunk/faxpp/examples/tokenizer_example.c 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/examples/tokenizer_example.c 2008-03-03 14:32:32 UTC (rev 39) @@ -52,7 +52,7 @@ exit(-1); } - FAXPP_Tokenizer *tokenizer = FAXPP_create_tokenizer(FAXPP_utf8_encode); + FAXPP_Tokenizer *tokenizer = FAXPP_create_tokenizer(FAXPP_utf8_transcoder); if(tokenizer == 0) { printf("ERROR: out of memory\n"); exit(1); @@ -94,7 +94,7 @@ } else length = 0; - length += fread(xml, 1, sizeof(xml) - length, file); + length += fread(xml + length, 1, sizeof(xml) - length, file); err = FAXPP_continue_tokenize(tokenizer, xml, length, length != sizeof(xml)); if(err != NO_ERROR) { Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/include/faxpp/error.h 2008-03-03 14:32:32 UTC (rev 39) @@ -57,6 +57,14 @@ INVALID_CHAR_IN_NOTATIONDECL_NAME, INVALID_CHAR_IN_ENTITYDECL_NAME, INVALID_ENTITYDECL, + REFERENCE_TO_UNPARSED_ENTITY, + RECURSIVE_ENTITY, + INCOMPLETE_MARKUP_IN_ENTITY_VALUE, + REFERENCE_TO_EXTERNAL_ENTITY, + ILLEGAL_CHARACTER_REFERENCE, + ILLEGAL_PARAMETER_ENTITY, + CANT_LOCATE_EXTERNAL_ENTITY, + DONT_PARSE_EXTERNAL_ENTITY, OUT_OF_MEMORY, ELEMENT_NAME_MISMATCH, Modified: trunk/faxpp/include/faxpp/event.h =================================================================== --- trunk/faxpp/include/faxpp/event.h 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/include/faxpp/event.h 2008-03-03 14:32:32 UTC (rev 39) @@ -24,6 +24,7 @@ NO_EVENT, START_DOCUMENT_EVENT, END_DOCUMENT_EVENT, + DOCTYPE_EVENT, START_ELEMENT_EVENT, SELF_CLOSING_ELEMENT_EVENT, END_ELEMENT_EVENT, @@ -32,17 +33,26 @@ IGNORABLE_WHITESPACE_EVENT, COMMENT_EVENT, PI_EVENT, - ENTITY_REFERENCE_EVENT, + ENTITY_REFERENCE_EVENT, ///< Reference to an external parsed entity that has not been parsed yet, or a built in entity DEC_CHAR_REFERENCE_EVENT, - HEX_CHAR_REFERENCE_EVENT + HEX_CHAR_REFERENCE_EVENT, + ENTITY_REFERENCE_START_EVENT, ///< Delimits the start of an expanded entity reference + ENTITY_REFERENCE_END_EVENT, ///< Delimits the end of an expanded entity reference + START_EXTERNAL_ENTITY_EVENT, ///< Occurs when an external entity parsed by the user starts + END_EXTERNAL_ENTITY_EVENT ///< Occurs when an external entity parsed by the user ends } FAXPP_EventType; /// An item in a linked list of a FAXPP_Attribute object's value typedef struct FAXPP_AttrValue_s { - FAXPP_EventType type; ///< The type of the attribute value. Can be CHARACTERS_EVENT, ENTITY_REFERENCE_EVENT, DEC_CHAR_REFERENCE_EVENT, and HEX_CHAR_REFERENCE_EVENT - /// The name of the item in the attribute's value. \details Present for event types ENTITY_REFERENCE_EVENT, DEC_CHAR_REFERENCE_EVENT, and HEX_CHAR_REFERENCE_EVENT + /// The type of the attribute value. \details Can be CHARACTERS_EVENT, ENTITY_REFERENCE_EVENT, DEC_CHAR_REFERENCE_EVENT, HEX_CHAR_REFERENCE_EVENT, + /// ENTITY_REFERENCE_START_EVENT and ENTITY_REFERENCE_END_EVENT + FAXPP_EventType type; + /// The name of the item in the attribute's value. \details Present for event types ENTITY_REFERENCE_EVENT, DEC_CHAR_REFERENCE_EVENT, + /// HEX_CHAR_REFERENCE_EVENT and ENTITY_REFERENCE_START_EVENT FAXPP_Text name; - FAXPP_Text value; ///< The value of the item in the attribute's value. \details Present for all possible event types + /// The value of the item in the attribute's value. \details Present for all possible event types except ENTITY_REFERENCE_START_EVENT and + /// ENTITY_REFERENCE_END_EVENT + FAXPP_Text value; unsigned int line; ///< The line the attribute value started on unsigned int column; ///< The column the attribute value started on @@ -72,10 +82,10 @@ typedef struct { FAXPP_EventType type; ///< The type of the event - FAXPP_Text prefix; ///< The prefix of the event. \details Present for event types START_ELEMENT_EVENT, SELF_CLOSING_ELEMENT_EVENT, and END_ELEMENT_EVENT + FAXPP_Text prefix; ///< The prefix of the event. \details Present for event types DOCTYPE_EVENT, START_ELEMENT_EVENT, SELF_CLOSING_ELEMENT_EVENT, and END_ELEMENT_EVENT FAXPP_Text uri; ///< The URI of the event. \details Present for event types START_ELEMENT_EVENT, SELF_CLOSING_ELEMENT_EVENT, and END_ELEMENT_EVENT - /// The name of the event. \details Present for event types START_ELEMENT_EVENT, SELF_CLOSING_ELEMENT_EVENT, END_ELEMENT_EVENT, PI_EVENT, ENTITY_REFERENCE_EVENT, - /// DEC_CHAR_REFERENCE_EVENT, and HEX_CHAR_REFERENCE_EVENT + /// The name of the event. \details Present for event types DOCTYPE_EVENT, START_ELEMENT_EVENT, SELF_CLOSING_ELEMENT_EVENT, END_ELEMENT_EVENT, PI_EVENT, + /// ENTITY_REFERENCE_EVENT, DEC_CHAR_REFERENCE_EVENT, and HEX_CHAR_REFERENCE_EVENT FAXPP_Text name; /// The value of the event. \details Present for event types CHARACTERS_EVENT, CDATA_EVENT, IGNORABLE_WHITESPACE_EVENT, COMMENT_EVENT, PI_EVENT, @@ -85,10 +95,13 @@ unsigned int attr_count; ///< The number of attributes in the event. \details Present for event types START_ELEMENT_EVENT, and SELF_CLOSING_ELEMENT_EVENT FAXPP_Attribute *attrs; ///< Array of attributes. \details Present for event types START_ELEMENT_EVENT, and SELF_CLOSING_ELEMENT_EVENT - FAXPP_Text version; ///< The version of the event. \details Present only for the START_DOCUMENT_EVENT event type - FAXPP_Text encoding; ///< The version of the event. \details Present only for the START_DOCUMENT_EVENT event type + FAXPP_Text version; ///< The version of the event. \details Present for event types START_DOCUMENT_EVENT and START_EXTERNAL_ENTITY_EVENT + FAXPP_Text encoding; ///< The version of the event. \details Present for event types START_DOCUMENT_EVENT and START_EXTERNAL_ENTITY_EVENT FAXPP_Text standalone; ///< The version of the event. \details Present only for the START_DOCUMENT_EVENT event type + FAXPP_Text system; ///< The system literal of the event. \details Present for event types DOCTYPE_EVENT and ENTITY_REFERENCE_EVENT + FAXPP_Text public; ///< The public ID literal of the event. \details Present for event types DOCTYPE_EVENT and ENTITY_REFERENCE_EVENT + unsigned int line; ///< The line number of the start of the event unsigned int column; ///< The column number of the start of the event } FAXPP_Event; Modified: trunk/faxpp/include/faxpp/parser.h =================================================================== --- trunk/faxpp/include/faxpp/parser.h 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/include/faxpp/parser.h 2008-03-03 14:32:32 UTC (rev 39) @@ -26,7 +26,7 @@ /** * \mainpage * - * Faxpp is a small, fast XML pull parser written in C with an API that can return UTF-8 or UTF-16 strings. It currently has no DTD support, although it is planned. + * Faxpp is a small, fast and conformant XML pull parser written in C with an API that can return UTF-8 or UTF-16 strings. * * Faxpp is written by John Snelson, and is released under the terms of the Apache Licence v2. * @@ -46,13 +46,14 @@ * \li The output string encoding is the same as the XML document's encoding. * \li The event / token does not cross a buffer boundary when streaming input. * \li The parser is not set to null terminate it's strings. + * \li Attribute values do not need to be normalized. * * Therefore, to maximize the performance from faxpp the following steps can be taken: * * \li Choose not to null terminate output strings. * \li Choose an output string encoding that is the same as most of the input XML documents that will be parsed. - * \li Stream XML documents using a large a buffer as possible. - * \li Choose to always recieve the output strings in the same encoding as the document, by setting FAXPP_set_encode() to 0. + * \li Stream XML documents using as large a buffer as possible. + * \li Turn off attribute value normalization by setting FAXPP_set_normalize_attrs() to 0 (this makes the parser non-conformant). * * \section Downloads * @@ -103,16 +104,33 @@ typedef unsigned int (*FAXPP_ReadCallback)(void *userData, void *buffer, unsigned int length); /** + * The function called when faxpp finds a reference to an external parsed entity. The function should + * lcoate the entity using it's system and public indentifiers and call FAXPP_parse_external_entity(), + * FAXPP_parse_external_entity_callback() or FAXPP_parse_external_entity_file() to parse the external + * entity. + * + * \param userData The user data supplied to the FAXPP_set_external_entity_callback() method + * \param parser A pointer to the parser + * \param system The entity's system identifier + * \param public The entity's public identifier + * + * \return NO_ERROR on success, DONT_PARSE_EXTERNAL_ENTITY to return an unexpanded ENTITY_REFERENCE_EVENT + * event, otherwise another error code to halt parsing (most probably CANT_LOCATE_EXTERNAL_ENTITY). + */ +typedef FAXPP_Error (*FAXPP_ExternalEntityCallback)(void *userData, FAXPP_Parser *parser, + const FAXPP_Text *system, const FAXPP_Text *public); + +/** * Creates a parser object * * \param mode The type of checks the parser should perform - * \param encode The encoding function to use when encoding event values + * \param encode The transcoder to use when encoding event values * * \return A pointer to the parser object, or 0 if out of memory. * * \relatesalso FAXPP_Parser */ -FAXPP_Parser *FAXPP_create_parser(FAXPP_ParseMode mode, FAXPP_EncodeFunction encode); +FAXPP_Parser *FAXPP_create_parser(FAXPP_ParseMode mode, FAXPP_Transcoder encode); /** * Frees a parser object @@ -164,16 +182,16 @@ void FAXPP_set_normalize_attrs(FAXPP_Parser *parser, unsigned int boolean); /** - * Sets the encoding that the parser will use when encoding event values. + * Sets the transcoder that the parser will use when encoding event values. * * Setting this parameter whilst a parse is in progress has undefined results. * * \param parser - * \param encode The encoding function to use when encoding event values + * \param encode The transcoder to use when encoding event values * * \relatesalso FAXPP_Parser */ -void FAXPP_set_encode(FAXPP_Parser *parser, FAXPP_EncodeFunction encode); +void FAXPP_set_encode(FAXPP_Parser *parser, FAXPP_Transcoder encode); /** * Returns the current FAXPP_DecodeFunction that the parser is using. @@ -203,6 +221,18 @@ void FAXPP_set_decode(FAXPP_Parser *parser, FAXPP_DecodeFunction decode); /** + * Sets the FAXPP_ExternalEntityCallback that the parser will call when it + * encounters a reference to an external parsed entity. + * + * \param parser + * \param callback The callback function + * \param userData The usuer data passed when the function is called + * + * \relatesalso FAXPP_Parser + */ +void FAXPP_set_external_entity_callback(FAXPP_Parser *parser, FAXPP_ExternalEntityCallback callback, void *userData); + +/** * Initialize the parser to parse the given buffer. This will halt any * parse that was already in progress. * @@ -229,7 +259,7 @@ * parse that was already in progress. * * The file provided must remain valid during the time that the parser is using it. - * The user remains responsible for closing the file after parsing has ended.. + * The user remains responsible for closing the file after parsing has ended. * * \param parser The parser to initialize * \param file The file descriptor of the file to parse @@ -261,6 +291,73 @@ FAXPP_Error FAXPP_init_parse_callback(FAXPP_Parser *parser, FAXPP_ReadCallback callback, void *userData); /** + * Interrupts parsing to parse the external entity in the given buffer. Any parsing + * that was previously underway will continue when the external entity has been parsed. + * This method is usually called when an ENTITY_REFERENCE_EVENT is encountered with a + * non-null public or system identifier, in order to parse the external entity it points + * to. + * + * The buffer provided must remain valid and unchanged during the time that + * the parser is using it, since a copy of it is \e not made. The user remains + * responsible for deleting the buffer. + * + * \param parser The parser to use + * \param buffer A pointer to the start of the buffer to parse + * \param length The length of the given buffer + * \param done Set to non-zero if this is the last buffer from the external entity + * + * \retval UNSUPPORTED_ENCODING If the encoding sniffing algorithm cannot recognize + * the encoding of the buffer + * \retval OUT_OF_MEMORY + * \retval NO_ERROR + * + * \relatesalso FAXPP_Parser + */ +FAXPP_Error FAXPP_parse_external_entity(FAXPP_Parser *parser, void *buffer, unsigned int length, unsigned int done); + +/** + * Interrupts parsing to parse the external entity from the given file. Any parsing + * that was previously underway will continue when the external entity has been parsed. + * This method is usually called when an ENTITY_REFERENCE_EVENT is encountered with a + * non-null public or system identifier, in order to parse the external entity it points + * to. + * + * The file provided must remain valid during the time that the parser is using it. + * The user remains responsible for closing the file after parsing has ended. + * + * \param parser The parser to initialize + * \param file The file descriptor of the file to parse + * + * \retval UNSUPPORTED_ENCODING If the encoding sniffing algorithm cannot recognize + * the encoding of the buffer + * \retval OUT_OF_MEMORY + * \retval NO_ERROR + * + * \relatesalso FAXPP_Parser + */ +FAXPP_Error FAXPP_parse_external_entity_file(FAXPP_Parser *parser, FILE *file); + +/** + * Interrupts parsing to parse the external entity using the given read callback. Any parsing + * that was previously underway will continue when the external entity has been parsed. + * This method is usually called when an ENTITY_REFERENCE_EVENT is encountered with a + * non-null public or system identifier, in order to parse the external entity it points + * to. + * + * \param parser The parser to initialize + * \param callback The read callback function to use to retrieve the parse input + * \param userData The user data to be passed to the callback function when it is called + * + * \retval UNSUPPORTED_ENCODING If the encoding sniffing algorithm cannot recognize + * the encoding of the buffer + * \retval OUT_OF_MEMORY + * \retval NO_ERROR + * + * \relatesalso FAXPP_Parser + */ +FAXPP_Error FAXPP_parse_external_entity_callback(FAXPP_Parser *parser, FAXPP_ReadCallback callback, void *userData); + +/** * Instructs the parser to release any dependencies it has on it's current buffer. * * This is typically called on recieving a PREMATURE_END_OF_BUFFER error, before Modified: trunk/faxpp/include/faxpp/tokenizer.h =================================================================== --- trunk/faxpp/include/faxpp/tokenizer.h 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/include/faxpp/tokenizer.h 2008-03-03 14:32:32 UTC (rev 39) @@ -34,12 +34,12 @@ /** * Creates a tokenizer object * - * \param encode The encoding function to use when encoding token values + * \param encode The transcoder to use when encoding token values * \return A pointer to the tokenizer object, or 0 if out of memory. * * \relatesalso FAXPP_Tokenizer */ -FAXPP_Tokenizer *FAXPP_create_tokenizer(FAXPP_EncodeFunction encode); +FAXPP_Tokenizer *FAXPP_create_tokenizer(FAXPP_Transcoder encode); /** * Frees a tokenizer object Modified: trunk/faxpp/include/faxpp/transcode.h =================================================================== --- trunk/faxpp/include/faxpp/transcode.h 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/include/faxpp/transcode.h 2008-03-03 14:32:32 UTC (rev 39) @@ -65,6 +65,30 @@ (void *buffer, void *buffer_end, Char32 ch); /** + * Encapsulates the information needed to both decode and encode + * an encoding. + * + * \see FAXPP_utf8_transcoder, FAXPP_utf16_native_transcoder + */ +typedef struct { + FAXPP_DecodeFunction decode; ///< The decode function + FAXPP_EncodeFunction encode; ///< The encode function +} FAXPP_Transcoder; + +/** + * Transcoder to and from UTF-8 + * + * \see FAXPP_utf8_decode, FAXPP_utf8_encode + */ +const FAXPP_Transcoder FAXPP_utf8_transcoder; +/** + * Transcoder to and from native endian UTF-16 + * + * \see FAXPP_utf16_native_decode, FAXPP_utf16_native_encode + */ +const FAXPP_Transcoder FAXPP_utf16_native_transcoder; + +/** * Returns a string describing the given (built-in) decode function * \param t * \return a string Modified: trunk/faxpp/src/attr_states.h =================================================================== --- trunk/faxpp/src/attr_states.h 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/src/attr_states.h 2008-03-03 14:32:32 UTC (rev 39) @@ -367,3 +367,61 @@ return NO_ERROR; } +// Special state for parsing the content of an entity reference +// inside an attribute value +FAXPP_Error +PREFIX(attr_value_state_en)(FAXPP_TokenizerEnv *env) +{ + while(1) { + if(env->position >= env->buffer_end) { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(ATTRIBUTE_VALUE_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } + + READ_CHAR; + + switch(env->current_char) { + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(ATTRIBUTE_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case '<': + next_char(env); + return INVALID_CHAR_IN_ATTRIBUTE; + LINE_ENDINGS + case '\t': { + if(env->normalize_attrs) { + // Move the token to the buffer, to normalize it + FAXPP_Error err = FAXPP_tokenizer_release_buffer(env, 0); + if(err != NO_ERROR) return err; + env->current_char = ' '; + } + break; + } + default: + DEFAULT_CASE; + + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + Modified: trunk/faxpp/src/buffer.c =================================================================== --- trunk/faxpp/src/buffer.c 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/src/buffer.c 2008-03-03 14:32:32 UTC (rev 39) @@ -36,6 +36,26 @@ if(buffer->buffer) free(buffer->buffer); } +void FAXPP_swap_buffer(FAXPP_Buffer *a, FAXPP_Buffer *b) +{ + void *p; + unsigned int i; + + p = a->buffer; + a->buffer = b->buffer; + b->buffer = p; + + i = a->length; + a->length = b->length; + b->length = i; + + p = a->cursor; + a->cursor = b->cursor; + b->cursor = p; + + // Don't swap the callback or user data +} + FAXPP_Error FAXPP_resize_buffer(FAXPP_Buffer *buffer, unsigned int minSize) { unsigned int newLength = buffer->length << 1; Modified: trunk/faxpp/src/buffer.h =================================================================== --- trunk/faxpp/src/buffer.h 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/src/buffer.h 2008-03-03 14:32:32 UTC (rev 39) @@ -36,6 +36,7 @@ FAXPP_Error FAXPP_init_buffer(FAXPP_Buffer *buffer, unsigned int initialSize, FAXPP_BufferResizeCallback callback, void *userData); void FAXPP_free_buffer(FAXPP_Buffer *buffer); +void FAXPP_swap_buffer(FAXPP_Buffer *a, FAXPP_Buffer *b); #define FAXPP_reset_buffer(buf) (buf)->cursor = (buf)->buffer Modified: trunk/faxpp/src/char_classes.c =================================================================== --- trunk/faxpp/src/char_classes.c 2008-02-27 09:44:24 UTC (rev 38) +++ trunk/faxpp/src/char_classes.c 2008-03-03 14:32:32 UTC (rev 39) @@ -42,900 +42,901 @@ // Don't need the region between 0x00 and 0xFF, since we look // them up directly in the table below /* { 0x000000, 0 }, */ -/* { 0x000009, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | WHITESPACE_CHAR }, */ -/* { 0x00000B, 0 }, */ -/* { 0x00000D, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | WHITESPACE_CHAR }, */ -/* { 0x00000E, 0 }, */ -/* { 0x000020, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | WHITESPACE_CHAR }, */ -/* { 0x000021, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x00002D, NCNAME_CHAR10 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x00002F, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x000030, NCNAME_CHAR10 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x00003A, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x000041, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x00005B, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x00005F, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x000060, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x000061, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x00007B, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x00007F, NON_RESTRICTED_CHAR10 }, */ -/* { 0x000085, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x000086, NON_RESTRICTED_CHAR10 }, */ -/* { 0x0000A0, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x0000B7, NCNAME_CHAR10 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x0000B8, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x0000C0, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x0000D7, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x0000D8, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ -/* { 0x0000F7, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, */ +/* { 0x000001, CHAR11 }, */ +/* { 0x000009, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 | WHITESPACE_CHAR }, */ +/* { 0x00000B, CHAR11 }, */ +/* { 0x00000D, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 | WHITESPACE_CHAR }, */ +/* { 0x00000E, CHAR11 }, */ +/* { 0x000020, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 | WHITESPACE_CHAR }, */ +/* { 0x000021, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x00002D, NCNAME_CHAR10 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x00002F, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x000030, NCNAME_CHAR10 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x00003A, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x000041, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x00005B, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x00005F, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x000060, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x000061, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x00007B, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x00007F, NON_RESTRICTED_CHAR10 | CHAR11 }, */ +/* { 0x000085, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x000086, NON_RESTRICTED_CHAR10 | CHAR11 }, */ +/* { 0x0000A0, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x0000B7, NCNAME_CHAR10 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x0000B8, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x0000C0, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x0000D7, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x0000D8, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ +/* { 0x0000F7, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 | CHAR11 }, */ - { 0x0000F8, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000132, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000134, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00013F, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000141, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000149, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00014A, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00017F, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000180, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0001C4, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0001CD, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0001F1, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0001F4, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0001F6, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0001FA, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000218, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000250, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0002A9, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0002BB, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0002C2, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0002D0, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0002D2, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000300, NCNAME_CHAR10 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000346, NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000360, NCNAME_CHAR10 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000362, NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000370, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00037E, NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00037F, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000386, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000387, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000388, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00038B, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00038C, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00038D, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00038E, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003A2, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003A3, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003CF, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003D0, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003D7, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003DA, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003DB, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003DC, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003DD, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003DE, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003DF, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003E0, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003E1, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003E2, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0003F4, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000401, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00040D, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00040E, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000450, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000451, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00045D, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00045E, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000482, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000483, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000487, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000490, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0004C5, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0004C7, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0004C9, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0004CB, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0004CD, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0004D0, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0004EC, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0004EE, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0004F6, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0004F8, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0004FA, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000531, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000557, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000559, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00055A, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000561, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000587, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000591, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005A2, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005A3, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005BA, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005BB, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005BE, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005BF, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005C0, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005C1, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005C3, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005C4, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005C5, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005D0, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005EB, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005F0, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0005F3, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000621, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00063B, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000640, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000641, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00064B, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000653, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000660, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00066A, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000670, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000671, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006B8, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006BA, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006BF, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006C0, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006CF, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006D0, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006D4, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006D5, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006D6, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006E5, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006E7, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006E9, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006EA, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006EE, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006F0, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0006FA, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000901, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000904, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000905, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00093A, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00093C, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00093D, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00093E, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00094E, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000951, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000955, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000958, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000962, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000964, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000966, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000970, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000981, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000984, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000985, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00098D, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x00098F, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000991, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000993, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009A9, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009AA, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009B1, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009B2, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009B3, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009B6, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009BA, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009BC, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009BD, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009BE, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009C5, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009C7, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009C9, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009CB, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009CE, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009D7, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009D8, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009DC, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009DE, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009DF, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009E2, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009E4, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009E6, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009F0, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x0009F2, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A02, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A03, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A05, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A0B, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A0F, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A11, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A13, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A29, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A2A, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A31, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A32, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A34, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A35, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A37, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A38, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A3A, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A3C, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A3D, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A3E, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A43, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A47, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A49, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A4B, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A4E, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A59, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A5D, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A5E, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A5F, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A66, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A72, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A75, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A81, NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A84, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A85, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A8C, NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | NON_RESTRICTED_CHAR11 }, - { 0x000A8D, NCNAME_START_CHAR10 | NCNAME_CHAR10 | NCNAME_START_CHAR11 | NCNAME_CHAR11 | NON_RESTRICTED_CHAR10 | ... [truncated message content] |
From: <jp...@us...> - 2008-02-27 09:44:20
|
Revision: 38 http://faxpp.svn.sourceforge.net/faxpp/?rev=38&view=rev Author: jpcs Date: 2008-02-27 01:44:24 -0800 (Wed, 27 Feb 2008) Log Message: ----------- Added code to tokenize entity declarations. Modified Paths: -------------- trunk/faxpp/Makefile.am trunk/faxpp/Makefile.in trunk/faxpp/include/faxpp/error.h trunk/faxpp/include/faxpp/token.h trunk/faxpp/src/attlistdecl.c trunk/faxpp/src/doctype.c trunk/faxpp/src/elementdecl.c trunk/faxpp/src/error.c trunk/faxpp/src/notationdecl.c trunk/faxpp/src/token.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c trunk/faxpp/src/xml_parser.h Added Paths: ----------- trunk/faxpp/src/entitydecl.c Modified: trunk/faxpp/Makefile.am =================================================================== --- trunk/faxpp/Makefile.am 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/Makefile.am 2008-02-27 09:44:24 UTC (rev 38) @@ -35,7 +35,8 @@ src/doctype.c \ src/elementdecl.c \ src/attlistdecl.c \ -src/notationdecl.c +src/notationdecl.c \ +src/entitydecl.c tokenizer_example_SOURCES = examples/tokenizer_example.c tokenizer_example_LDADD = libfaxpp.la Modified: trunk/faxpp/Makefile.in =================================================================== --- trunk/faxpp/Makefile.in 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/Makefile.in 2008-02-27 09:44:24 UTC (rev 38) @@ -64,7 +64,7 @@ element.lo error.lo event.lo pi.lo reference.lo token.lo \ tokenizer_states.lo transcode.lo xmldecl.lo xml_parser.lo \ xml_tokenizer.lo doctype.lo elementdecl.lo attlistdecl.lo \ - notationdecl.lo + notationdecl.lo entitydecl.lo libfaxpp_la_OBJECTS = $(am_libfaxpp_la_OBJECTS) libfaxpp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -256,7 +256,8 @@ src/doctype.c \ src/elementdecl.c \ src/attlistdecl.c \ -src/notationdecl.c +src/notationdecl.c \ +src/entitydecl.c tokenizer_example_SOURCES = examples/tokenizer_example.c tokenizer_example_LDADD = libfaxpp.la @@ -386,6 +387,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/doctype.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/element.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elementdecl.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/entitydecl.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/event.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/notationdecl.Plo@am__quote@ @@ -555,6 +557,13 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o notationdecl.lo `test -f 'src/notationdecl.c' || echo '$(srcdir)/'`src/notationdecl.c +entitydecl.lo: src/entitydecl.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT entitydecl.lo -MD -MP -MF $(DEPDIR)/entitydecl.Tpo -c -o entitydecl.lo `test -f 'src/entitydecl.c' || echo '$(srcdir)/'`src/entitydecl.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/entitydecl.Tpo $(DEPDIR)/entitydecl.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='src/entitydecl.c' object='entitydecl.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o entitydecl.lo `test -f 'src/entitydecl.c' || echo '$(srcdir)/'`src/entitydecl.c + parser_example.o: examples/parser_example.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT parser_example.o -MD -MP -MF $(DEPDIR)/parser_example.Tpo -c -o parser_example.o `test -f 'examples/parser_example.c' || echo '$(srcdir)/'`examples/parser_example.c @am__fastdepCC_TRUE@ mv -f $(DEPDIR)/parser_example.Tpo $(DEPDIR)/parser_example.Po Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/include/faxpp/error.h 2008-02-27 09:44:24 UTC (rev 38) @@ -55,6 +55,8 @@ INVALID_CHAR_IN_ELEMENTDECL_NAME, INVALID_CHAR_IN_ATTLISTDECL_NAME, INVALID_CHAR_IN_NOTATIONDECL_NAME, + INVALID_CHAR_IN_ENTITYDECL_NAME, + INVALID_ENTITYDECL, OUT_OF_MEMORY, ELEMENT_NAME_MISMATCH, Modified: trunk/faxpp/include/faxpp/token.h =================================================================== --- trunk/faxpp/include/faxpp/token.h 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/include/faxpp/token.h 2008-02-27 09:44:24 UTC (rev 38) @@ -71,6 +71,7 @@ SYSTEM_LITERAL_TOKEN, PUBID_LITERAL_TOKEN, + NDATA_NAME_TOKEN, ELEMENTDECL_PREFIX_TOKEN, ELEMENTDECL_NAME_TOKEN, @@ -83,6 +84,11 @@ NOTATIONDECL_NAME_TOKEN, NOTATIONDECL_CONTENT_TOKEN, + ENTITYDECL_NAME_TOKEN, + ENTITYDECL_VALUE_TOKEN, + ENTITYDECL_END_TOKEN, + PARAMENTITYDECL_NAME_TOKEN, + END_OF_BUFFER_TOKEN = 99 } FAXPP_TokenType; Modified: trunk/faxpp/src/attlistdecl.c =================================================================== --- trunk/faxpp/src/attlistdecl.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/attlistdecl.c 2008-02-27 09:44:24 UTC (rev 38) @@ -163,6 +163,7 @@ switch(env->current_char) { case '>': base_state(env); + token_end_position(env); report_token(ATTLISTDECL_CONTENT_TOKEN, env); break; LINE_ENDINGS Modified: trunk/faxpp/src/doctype.c =================================================================== --- trunk/faxpp/src/doctype.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/doctype.c 2008-02-27 09:44:24 UTC (rev 38) @@ -398,6 +398,9 @@ case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: case 0x78: case 0x79: case 0x7A: + // 0-9 + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': + case '9': case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':': case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_': case '%': @@ -439,6 +442,9 @@ case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: case 0x78: case 0x79: case 0x7A: + // 0-9 + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': + case '9': case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':': case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_': case '%': case '\'': @@ -544,7 +550,7 @@ env->state = comment_start_state2; break; case 'E': - env->state = elementdecl_initial_state1; + env->state = elementdecl_or_entitydecl_state; break; case 'A': env->state = attlistdecl_initial_state1; Modified: trunk/faxpp/src/elementdecl.c =================================================================== --- trunk/faxpp/src/elementdecl.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/elementdecl.c 2008-02-27 09:44:24 UTC (rev 38) @@ -17,6 +17,28 @@ #include "tokenizer_states.h" #include "char_classes.h" +FAXPP_Error +elementdecl_or_entitydecl_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case 'L': + env->state = elementdecl_initial_state1; + next_char(env); + break; + case 'N': + env->state = entitydecl_initial_state1; + next_char(env); + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_DTD_DECL; + } + return NO_ERROR; +} + #define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \ FAXPP_Error \ name(FAXPP_TokenizerEnv *env) \ @@ -37,12 +59,11 @@ return NO_ERROR; \ } -SINGLE_CHAR_STATE(elementdecl_initial_state1, 'L', 0, elementdecl_initial_state2, INVALID_DTD_DECL) -SINGLE_CHAR_STATE(elementdecl_initial_state2, 'E', 0, elementdecl_initial_state3, INVALID_DTD_DECL) -SINGLE_CHAR_STATE(elementdecl_initial_state3, 'M', 0, elementdecl_initial_state4, INVALID_DTD_DECL) -SINGLE_CHAR_STATE(elementdecl_initial_state4, 'E', 0, elementdecl_initial_state5, INVALID_DTD_DECL) -SINGLE_CHAR_STATE(elementdecl_initial_state5, 'N', 0, elementdecl_initial_state6, INVALID_DTD_DECL) -SINGLE_CHAR_STATE(elementdecl_initial_state6, 'T', elementdecl_name_state1, ws_plus_state, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(elementdecl_initial_state1, 'E', 0, elementdecl_initial_state2, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(elementdecl_initial_state2, 'M', 0, elementdecl_initial_state3, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(elementdecl_initial_state3, 'E', 0, elementdecl_initial_state4, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(elementdecl_initial_state4, 'N', 0, elementdecl_initial_state5, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(elementdecl_initial_state5, 'T', elementdecl_name_state1, ws_plus_state, INVALID_DTD_DECL) FAXPP_Error elementdecl_name_state1(FAXPP_TokenizerEnv *env) @@ -149,6 +170,7 @@ switch(env->current_char) { case '>': base_state(env); + token_end_position(env); report_token(ELEMENTDECL_CONTENT_TOKEN, env); break; LINE_ENDINGS Added: trunk/faxpp/src/entitydecl.c =================================================================== --- trunk/faxpp/src/entitydecl.c (rev 0) +++ trunk/faxpp/src/entitydecl.c 2008-02-27 09:44:24 UTC (rev 38) @@ -0,0 +1,574 @@ +/* + * Copyright 2007 Doxological Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tokenizer_states.h" +#include "char_classes.h" + +#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \ +FAXPP_Error \ +name(FAXPP_TokenizerEnv *env) \ +{ \ + read_char(env); \ +\ + switch(env->current_char) { \ + case (ch): \ + if((next_stored_state) != 0) env->stored_state = (next_stored_state); \ + env->state = (next_state); \ + next_char(env); \ + break; \ + LINE_ENDINGS \ + default: \ + next_char(env); \ + return (error); \ + } \ + return NO_ERROR; \ +} + +SINGLE_CHAR_STATE(entitydecl_initial_state1, 'T', 0, entitydecl_initial_state2, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(entitydecl_initial_state2, 'I', 0, entitydecl_initial_state3, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(entitydecl_initial_state3, 'T', 0, entitydecl_initial_state4, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(entitydecl_initial_state4, 'Y', entitydecl_param_or_general_state, ws_plus_state, INVALID_DTD_DECL) + +FAXPP_Error +entitydecl_param_or_general_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '%': + env->stored_state = paramentitydecl_name_state1; + env->state = ws_plus_state; + next_char(env); + break; + LINE_ENDINGS + default: + env->state = entitydecl_name_state; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ENTITYDECL_NAME; + break; + } + + return NO_ERROR; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +FAXPP_Error +entitydecl_name_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = entitydecl_content_state; + env->state = ws_state; + token_end_position(env); + report_token(ENTITYDECL_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_ENTITYDECL_NAME; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +entitydecl_content_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '"': + env->state = entitydecl_value_quot_state; + next_char(env); + token_start_position(env); + return NO_ERROR; + case '\'': + env->state = entitydecl_value_apos_state; + next_char(env); + token_start_position(env); + return NO_ERROR; + case 'S': + env->stored_state = entitydecl_ws_state; + env->state = system_id_initial_state1; + break; + case 'P': + env->stored_state = entitydecl_ws_state; + env->state = public_id_initial_state1; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_ENTITYDECL; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +entitydecl_value_apos_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + if(env->position >= env->buffer_end) { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(ENTITYDECL_VALUE_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } + + read_char_no_check(env); + + switch(env->current_char) { + case '\'': + env->state = entitydecl_end_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + return NO_ERROR; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case '%': + store_state(env); + env->state = parameter_entity_reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +entitydecl_value_quot_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + if(env->position >= env->buffer_end) { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(ENTITYDECL_VALUE_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } + + read_char_no_check(env); + + switch(env->current_char) { + case '"': + env->state = entitydecl_end_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + return NO_ERROR; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case '%': + store_state(env); + env->state = parameter_entity_reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +entitydecl_ws_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = entitydecl_ndata_or_end_state;; + next_char(env); + break; + case '>': + env->state = entitydecl_end_state; + break; + default: + env->state = entitydecl_ndata_or_end_state; + return EXPECTING_WHITESPACE; + } + return NO_ERROR; +} + +FAXPP_Error +entitydecl_ndata_or_end_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '>': + env->state = entitydecl_end_state; + break; + case 'N': + env->state = entitydecl_ndata_state1; + next_char(env); + break; + default: + next_char(env); + return INVALID_ENTITYDECL; + } + return NO_ERROR; +} + +SINGLE_CHAR_STATE(entitydecl_ndata_state1, 'D', 0, entitydecl_ndata_state2, INVALID_ENTITYDECL) +SINGLE_CHAR_STATE(entitydecl_ndata_state2, 'A', 0, entitydecl_ndata_state3, INVALID_ENTITYDECL) +SINGLE_CHAR_STATE(entitydecl_ndata_state3, 'T', 0, entitydecl_ndata_state4, INVALID_ENTITYDECL) +SINGLE_CHAR_STATE(entitydecl_ndata_state4, 'A', entitydecl_ndata_name_state1, ws_plus_state, INVALID_ENTITYDECL) + +FAXPP_Error +entitydecl_ndata_name_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = entitydecl_ndata_name_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_ENTITYDECL; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +entitydecl_ndata_name_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->state = entitydecl_end_state; + token_end_position(env); + report_token(NDATA_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + case '>': + env->state = entitydecl_end_state; + token_end_position(env); + report_token(NDATA_NAME_TOKEN, env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_ENTITYDECL; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +entitydecl_end_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '>': + base_state(env); + report_empty_token(ENTITYDECL_END_TOKEN, env); + next_char(env); + token_start_position(env); + break; + default: + next_char(env); + return INVALID_ENTITYDECL; + } + return NO_ERROR; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +FAXPP_Error +paramentitydecl_name_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = paramentitydecl_name_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_ENTITYDECL_NAME; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +paramentitydecl_name_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = paramentitydecl_content_state; + env->state = ws_state; + token_end_position(env); + report_token(PARAMENTITYDECL_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_ENTITYDECL_NAME; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +paramentitydecl_content_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '"': + env->state = paramentitydecl_value_quot_state; + next_char(env); + token_start_position(env); + return NO_ERROR; + case '\'': + env->state = paramentitydecl_value_apos_state; + next_char(env); + token_start_position(env); + return NO_ERROR; + case 'S': + env->stored_state = paramentitydecl_end_state; + env->state = system_id_initial_state1; + break; + case 'P': + env->stored_state = paramentitydecl_end_state; + env->state = public_id_initial_state1; + break; + LINE_ENDINGS + default: + next_char(env); + return INVALID_ENTITYDECL; + } + next_char(env); + return NO_ERROR; +} + +FAXPP_Error +paramentitydecl_value_apos_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + if(env->position >= env->buffer_end) { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(ENTITYDECL_VALUE_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } + + read_char_no_check(env); + + switch(env->current_char) { + case '\'': + env->state = paramentitydecl_end_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + return NO_ERROR; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case '%': + store_state(env); + env->state = parameter_entity_reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env) +{ + while(1) { + if(env->position >= env->buffer_end) { + if(env->token.value.ptr) { + token_end_position(env); + if(env->token.value.len != 0) { + report_token(ENTITYDECL_VALUE_TOKEN, env); + return NO_ERROR; + } + } + token_start_position(env); + return PREMATURE_END_OF_BUFFER; + } + + read_char_no_check(env); + + switch(env->current_char) { + case '"': + env->state = paramentitydecl_end_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + return NO_ERROR; + case '&': + store_state(env); + env->state = reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + case '%': + store_state(env); + env->state = parameter_entity_reference_state; + token_end_position(env); + report_token(ENTITYDECL_VALUE_TOKEN, env); + next_char(env); + token_start_position(env); + return NO_ERROR; + LINE_ENDINGS + default: + if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) { + next_char(env); + return RESTRICTED_CHAR; + } + break; + } + next_char(env); + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +paramentitydecl_end_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + WHITESPACE: + next_char(env); + break; + case '>': + base_state(env); + report_empty_token(ENTITYDECL_END_TOKEN, env); + next_char(env); + token_start_position(env); + break; + default: + next_char(env); + return INVALID_ENTITYDECL; + } + return NO_ERROR; +} + Modified: trunk/faxpp/src/error.c =================================================================== --- trunk/faxpp/src/error.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/error.c 2008-02-27 09:44:24 UTC (rev 38) @@ -105,6 +105,10 @@ return "INVALID_CHAR_IN_ATTLISTDECL_NAME"; case INVALID_CHAR_IN_NOTATIONDECL_NAME: return "INVALID_CHAR_IN_NOTATIONDECL_NAME"; + case INVALID_CHAR_IN_ENTITYDECL_NAME: + return "INVALID_CHAR_IN_ENTITYDECL_NAME"; + case INVALID_ENTITYDECL: + return "INVALID_ENTITYDECL"; case NO_ERROR: break; } Modified: trunk/faxpp/src/notationdecl.c =================================================================== --- trunk/faxpp/src/notationdecl.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/notationdecl.c 2008-02-27 09:44:24 UTC (rev 38) @@ -98,6 +98,7 @@ switch(env->current_char) { case '>': base_state(env); + token_end_position(env); report_token(NOTATIONDECL_CONTENT_TOKEN, env); break; LINE_ENDINGS Modified: trunk/faxpp/src/token.c =================================================================== --- trunk/faxpp/src/token.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/token.c 2008-02-27 09:44:24 UTC (rev 38) @@ -96,6 +96,8 @@ return "SYSTEM_LITERAL_TOKEN"; case PUBID_LITERAL_TOKEN: return "PUBID_LITERAL_TOKEN"; + case NDATA_NAME_TOKEN: + return "NDATA_NAME_TOKEN"; case ELEMENTDECL_PREFIX_TOKEN: return "ELEMENTDECL_PREFIX_TOKEN"; @@ -116,6 +118,15 @@ case NOTATIONDECL_CONTENT_TOKEN: return "NOTATIONDECL_CONTENT_TOKEN"; + case ENTITYDECL_NAME_TOKEN: + return "ENTITYDECL_NAME_TOKEN"; + case ENTITYDECL_VALUE_TOKEN: + return "ENTITYDECL_VALUE_TOKEN"; + case ENTITYDECL_END_TOKEN: + return "ENTITYDECL_END_TOKEN"; + case PARAMENTITYDECL_NAME_TOKEN: + return "PARAMENTITYDECL_NAME_TOKEN"; + case NO_TOKEN: break; } Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/tokenizer_states.c 2008-02-27 09:44:24 UTC (rev 38) @@ -460,10 +460,10 @@ else if(state == pubid_literal_quot_state) return "pubid_literal_quot_state"; + else if(state == elementdecl_or_entitydecl_state) + return "elementdecl_or_entitydecl_state"; else if(state == elementdecl_initial_state1) return "elementdecl_initial_state1"; - else if(state == elementdecl_initial_state1) - return "elementdecl_initial_state1"; else if(state == elementdecl_initial_state2) return "elementdecl_initial_state2"; else if(state == elementdecl_initial_state3) @@ -472,8 +472,6 @@ return "elementdecl_initial_state4"; else if(state == elementdecl_initial_state5) return "elementdecl_initial_state5"; - else if(state == elementdecl_initial_state6) - return "elementdecl_initial_state6"; else if(state == elementdecl_name_state1) return "elementdecl_name_state1"; else if(state == elementdecl_name_state2) @@ -533,6 +531,59 @@ else if(state == notationdecl_content_state) return "notationdecl_content_state"; + else if(state == entitydecl_initial_state1) + return "entitydecl_initial_state1"; + else if(state == entitydecl_initial_state1) + return "entitydecl_initial_state1"; + else if(state == entitydecl_initial_state2) + return "entitydecl_initial_state2"; + else if(state == entitydecl_initial_state3) + return "entitydecl_initial_state3"; + else if(state == entitydecl_initial_state4) + return "entitydecl_initial_state4"; + else if(state == entitydecl_param_or_general_state) + return "entitydecl_param_or_general_state"; + + else if(state == entitydecl_name_state) + return "entitydecl_name_state"; + else if(state == entitydecl_content_state) + return "entitydecl_content_state"; + else if(state == entitydecl_value_apos_state) + return "entitydecl_value_apos_state"; + else if(state == entitydecl_value_quot_state) + return "entitydecl_value_quot_state"; + else if(state == entitydecl_ws_state) + return "entitydecl_ws_state"; + else if(state == entitydecl_ndata_or_end_state) + return "entitydecl_ndata_or_end_state"; + else if(state == entitydecl_ndata_state1) + return "entitydecl_ndata_state1"; + else if(state == entitydecl_ndata_state2) + return "entitydecl_ndata_state2"; + else if(state == entitydecl_ndata_state3) + return "entitydecl_ndata_state3"; + else if(state == entitydecl_ndata_state4) + return "entitydecl_ndata_state4"; + else if(state == entitydecl_ndata_name_state1) + return "entitydecl_ndata_name_state1"; + else if(state == entitydecl_ndata_name_state2) + return "entitydecl_ndata_name_state2"; + else if(state == entitydecl_end_state) + return "entitydecl_end_state"; + + else if(state == paramentitydecl_name_state1) + return "paramentitydecl_name_state1"; + else if(state == paramentitydecl_name_state2) + return "paramentitydecl_name_state2"; + else if(state == paramentitydecl_content_state) + return "paramentitydecl_content_state"; + else if(state == paramentitydecl_value_apos_state) + return "paramentitydecl_value_apos_state"; + else if(state == paramentitydecl_value_quot_state) + return "paramentitydecl_value_quot_state"; + else if(state == paramentitydecl_end_state) + return "paramentitydecl_end_state"; + return "unknown"; } #endif Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/tokenizer_states.h 2008-02-27 09:44:24 UTC (rev 38) @@ -279,12 +279,12 @@ FAXPP_Error pubid_literal_apos_state(FAXPP_TokenizerEnv *env); FAXPP_Error pubid_literal_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error elementdecl_or_entitydecl_state(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state3(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state4(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_initial_state5(FAXPP_TokenizerEnv *env); -FAXPP_Error elementdecl_initial_state6(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_state1(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error elementdecl_name_seen_colon_state1(FAXPP_TokenizerEnv *env); @@ -314,7 +314,34 @@ FAXPP_Error notationdecl_name_state2(FAXPP_TokenizerEnv *env); FAXPP_Error notationdecl_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_param_or_general_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_name_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_value_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_value_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ws_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_or_end_state(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_ndata_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error entitydecl_end_state(FAXPP_TokenizerEnv *env); + +FAXPP_Error paramentitydecl_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_value_apos_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env); +FAXPP_Error paramentitydecl_end_state(FAXPP_TokenizerEnv *env); + + /********************* * * Tokenizer Helper Functions Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/xml_parser.c 2008-02-27 09:44:24 UTC (rev 38) @@ -819,6 +819,7 @@ case DOCTYPE_END_TOKEN: case SYSTEM_LITERAL_TOKEN: case PUBID_LITERAL_TOKEN: + case NDATA_NAME_TOKEN: case PE_REFERENCE_TOKEN: case ELEMENTDECL_PREFIX_TOKEN: case ELEMENTDECL_NAME_TOKEN: @@ -828,6 +829,10 @@ case ATTLISTDECL_CONTENT_TOKEN: case NOTATIONDECL_NAME_TOKEN: case NOTATIONDECL_CONTENT_TOKEN: + case ENTITYDECL_NAME_TOKEN: + case ENTITYDECL_VALUE_TOKEN: + case ENTITYDECL_END_TOKEN: + case PARAMENTITYDECL_NAME_TOKEN: // TBD - jpcs break; Modified: trunk/faxpp/src/xml_parser.h =================================================================== --- trunk/faxpp/src/xml_parser.h 2008-02-25 12:05:13 UTC (rev 37) +++ trunk/faxpp/src/xml_parser.h 2008-02-27 09:44:24 UTC (rev 38) @@ -49,6 +49,23 @@ struct FAXPP_ElementInfo_s *prev; } FAXPP_ElementInfo; +typedef struct FAXPP_EntityValue_s { + FAXPP_TokenType type; + FAXPP_Text value; + + struct FAXPP_EntityValue_s *next; +} FAXPP_EntityValue; + +typedef struct FAXPP_EntityInfo_s { + FAXPP_Text name; + + FAXPP_EntityValue value; + + FAXPP_Buffer buffer; + + struct FAXPP_EntityInfo_s *prev; +} FAXPP_EntityInfo; + typedef struct FAXPP_ParserEnv_s FAXPP_ParserEnv; typedef FAXPP_Error (*FAXPP_NextEvent)(FAXPP_ParserEnv *env); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <jp...@us...> - 2008-02-25 12:05:15
|
Revision: 37 http://faxpp.svn.sourceforge.net/faxpp/?rev=37&view=rev Author: jpcs Date: 2008-02-25 04:05:13 -0800 (Mon, 25 Feb 2008) Log Message: ----------- Implemented tokenization of notation declarations. Modified Paths: -------------- trunk/faxpp/Makefile.am trunk/faxpp/Makefile.in trunk/faxpp/include/faxpp/error.h trunk/faxpp/include/faxpp/token.h trunk/faxpp/src/doctype.c trunk/faxpp/src/error.c trunk/faxpp/src/token.c trunk/faxpp/src/tokenizer_states.c trunk/faxpp/src/tokenizer_states.h trunk/faxpp/src/xml_parser.c Added Paths: ----------- trunk/faxpp/src/notationdecl.c Modified: trunk/faxpp/Makefile.am =================================================================== --- trunk/faxpp/Makefile.am 2008-02-24 02:26:52 UTC (rev 36) +++ trunk/faxpp/Makefile.am 2008-02-25 12:05:13 UTC (rev 37) @@ -34,7 +34,8 @@ src/xml_tokenizer.h \ src/doctype.c \ src/elementdecl.c \ -src/attlistdecl.c +src/attlistdecl.c \ +src/notationdecl.c tokenizer_example_SOURCES = examples/tokenizer_example.c tokenizer_example_LDADD = libfaxpp.la Modified: trunk/faxpp/Makefile.in =================================================================== --- trunk/faxpp/Makefile.in 2008-02-24 02:26:52 UTC (rev 36) +++ trunk/faxpp/Makefile.in 2008-02-25 12:05:13 UTC (rev 37) @@ -63,7 +63,8 @@ am_libfaxpp_la_OBJECTS = buffer.lo cdata.lo char_classes.lo comment.lo \ element.lo error.lo event.lo pi.lo reference.lo token.lo \ tokenizer_states.lo transcode.lo xmldecl.lo xml_parser.lo \ - xml_tokenizer.lo doctype.lo elementdecl.lo attlistdecl.lo + xml_tokenizer.lo doctype.lo elementdecl.lo attlistdecl.lo \ + notationdecl.lo libfaxpp_la_OBJECTS = $(am_libfaxpp_la_OBJECTS) libfaxpp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -254,7 +255,8 @@ src/xml_tokenizer.h \ src/doctype.c \ src/elementdecl.c \ -src/attlistdecl.c +src/attlistdecl.c \ +src/notationdecl.c tokenizer_example_SOURCES = examples/tokenizer_example.c tokenizer_example_LDADD = libfaxpp.la @@ -386,6 +388,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elementdecl.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/event.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/notationdecl.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parser_example.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pi.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reference.Plo@am__quote@ @@ -545,6 +548,13 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o attlistdecl.lo `test -f 'src/attlistdecl.c' || echo '$(srcdir)/'`src/attlistdecl.c +notationdecl.lo: src/notationdecl.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT notationdecl.lo -MD -MP -MF $(DEPDIR)/notationdecl.Tpo -c -o notationdecl.lo `test -f 'src/notationdecl.c' || echo '$(srcdir)/'`src/notationdecl.c +@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/notationdecl.Tpo $(DEPDIR)/notationdecl.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='src/notationdecl.c' object='notationdecl.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o notationdecl.lo `test -f 'src/notationdecl.c' || echo '$(srcdir)/'`src/notationdecl.c + parser_example.o: examples/parser_example.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT parser_example.o -MD -MP -MF $(DEPDIR)/parser_example.Tpo -c -o parser_example.o `test -f 'examples/parser_example.c' || echo '$(srcdir)/'`examples/parser_example.c @am__fastdepCC_TRUE@ mv -f $(DEPDIR)/parser_example.Tpo $(DEPDIR)/parser_example.Po Modified: trunk/faxpp/include/faxpp/error.h =================================================================== --- trunk/faxpp/include/faxpp/error.h 2008-02-24 02:26:52 UTC (rev 36) +++ trunk/faxpp/include/faxpp/error.h 2008-02-25 12:05:13 UTC (rev 37) @@ -54,6 +54,7 @@ INVALID_DTD_DECL, INVALID_CHAR_IN_ELEMENTDECL_NAME, INVALID_CHAR_IN_ATTLISTDECL_NAME, + INVALID_CHAR_IN_NOTATIONDECL_NAME, OUT_OF_MEMORY, ELEMENT_NAME_MISMATCH, Modified: trunk/faxpp/include/faxpp/token.h =================================================================== --- trunk/faxpp/include/faxpp/token.h 2008-02-24 02:26:52 UTC (rev 36) +++ trunk/faxpp/include/faxpp/token.h 2008-02-25 12:05:13 UTC (rev 37) @@ -80,6 +80,9 @@ ATTLISTDECL_NAME_TOKEN, ATTLISTDECL_CONTENT_TOKEN, + NOTATIONDECL_NAME_TOKEN, + NOTATIONDECL_CONTENT_TOKEN, + END_OF_BUFFER_TOKEN = 99 } FAXPP_TokenType; Modified: trunk/faxpp/src/doctype.c =================================================================== --- trunk/faxpp/src/doctype.c 2008-02-24 02:26:52 UTC (rev 36) +++ trunk/faxpp/src/doctype.c 2008-02-25 12:05:13 UTC (rev 37) @@ -549,6 +549,9 @@ case 'A': env->state = attlistdecl_initial_state1; break; + case 'N': + env->state = notationdecl_initial_state1; + break; LINE_ENDINGS default: env->state = comment_content_state; Modified: trunk/faxpp/src/error.c =================================================================== --- trunk/faxpp/src/error.c 2008-02-24 02:26:52 UTC (rev 36) +++ trunk/faxpp/src/error.c 2008-02-25 12:05:13 UTC (rev 37) @@ -103,6 +103,8 @@ return "INVALID_CHAR_IN_ELEMENTDECL_NAME"; case INVALID_CHAR_IN_ATTLISTDECL_NAME: return "INVALID_CHAR_IN_ATTLISTDECL_NAME"; + case INVALID_CHAR_IN_NOTATIONDECL_NAME: + return "INVALID_CHAR_IN_NOTATIONDECL_NAME"; case NO_ERROR: break; } Added: trunk/faxpp/src/notationdecl.c =================================================================== --- trunk/faxpp/src/notationdecl.c (rev 0) +++ trunk/faxpp/src/notationdecl.c 2008-02-25 12:05:13 UTC (rev 37) @@ -0,0 +1,110 @@ +/* + * Copyright 2007 Doxological Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tokenizer_states.h" +#include "char_classes.h" + +#define SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \ +FAXPP_Error \ +name(FAXPP_TokenizerEnv *env) \ +{ \ + read_char(env); \ +\ + switch(env->current_char) { \ + case (ch): \ + if((next_stored_state) != 0) env->stored_state = (next_stored_state); \ + env->state = (next_state); \ + next_char(env); \ + break; \ + LINE_ENDINGS \ + default: \ + next_char(env); \ + return (error); \ + } \ + return NO_ERROR; \ +} + +SINGLE_CHAR_STATE(notationdecl_initial_state1, 'O', 0, notationdecl_initial_state2, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(notationdecl_initial_state2, 'T', 0, notationdecl_initial_state3, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(notationdecl_initial_state3, 'A', 0, notationdecl_initial_state4, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(notationdecl_initial_state4, 'T', 0, notationdecl_initial_state5, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(notationdecl_initial_state5, 'I', 0, notationdecl_initial_state6, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(notationdecl_initial_state6, 'O', 0, notationdecl_initial_state7, INVALID_DTD_DECL) +SINGLE_CHAR_STATE(notationdecl_initial_state7, 'N', notationdecl_name_state1, ws_plus_state, INVALID_DTD_DECL) + +FAXPP_Error +notationdecl_name_state1(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + LINE_ENDINGS + default: + env->state = notationdecl_name_state2; + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_start_char) == 0) + return INVALID_CHAR_IN_NOTATIONDECL_NAME; + break; + } + + return NO_ERROR; +} + +FAXPP_Error +notationdecl_name_state2(FAXPP_TokenizerEnv *env) +{ + while(1) { + read_char(env); + + switch(env->current_char) { + WHITESPACE: + env->stored_state = notationdecl_content_state; + env->state = ws_state; + token_end_position(env); + report_token(NOTATIONDECL_NAME_TOKEN, env); + next_char(env); + return NO_ERROR; + default: + break; + } + + next_char(env); + if((FAXPP_char_flags(env->current_char) & env->ncname_char) == 0) + return INVALID_CHAR_IN_NOTATIONDECL_NAME; + } + + // Never happens + return NO_ERROR; +} + +FAXPP_Error +notationdecl_content_state(FAXPP_TokenizerEnv *env) +{ + read_char(env); + + switch(env->current_char) { + case '>': + base_state(env); + report_token(NOTATIONDECL_CONTENT_TOKEN, env); + break; + LINE_ENDINGS + default: + break; + } + next_char(env); + return NO_ERROR; +} + Modified: trunk/faxpp/src/token.c =================================================================== --- trunk/faxpp/src/token.c 2008-02-24 02:26:52 UTC (rev 36) +++ trunk/faxpp/src/token.c 2008-02-25 12:05:13 UTC (rev 37) @@ -111,6 +111,11 @@ case ATTLISTDECL_CONTENT_TOKEN: return "ATTLISTDECL_CONTENT_TOKEN"; + case NOTATIONDECL_NAME_TOKEN: + return "NOTATIONDECL_NAME_TOKEN"; + case NOTATIONDECL_CONTENT_TOKEN: + return "NOTATIONDECL_CONTENT_TOKEN"; + case NO_TOKEN: break; } Modified: trunk/faxpp/src/tokenizer_states.c =================================================================== --- trunk/faxpp/src/tokenizer_states.c 2008-02-24 02:26:52 UTC (rev 36) +++ trunk/faxpp/src/tokenizer_states.c 2008-02-25 12:05:13 UTC (rev 37) @@ -510,6 +510,29 @@ else if(state == attlistdecl_content_state) return "attlistdecl_content_state"; + else if(state == notationdecl_initial_state1) + return "notationdecl_initial_state1"; + else if(state == notationdecl_initial_state1) + return "notationdecl_initial_state1"; + else if(state == notationdecl_initial_state2) + return "notationdecl_initial_state2"; + else if(state == notationdecl_initial_state3) + return "notationdecl_initial_state3"; + else if(state == notationdecl_initial_state4) + return "notationdecl_initial_state4"; + else if(state == notationdecl_initial_state5) + return "notationdecl_initial_state5"; + else if(state == notationdecl_initial_state6) + return "notationdecl_initial_state6"; + else if(state == notationdecl_initial_state7) + return "notationdecl_initial_state7"; + else if(state == notationdecl_name_state1) + return "notationdecl_name_state1"; + else if(state == notationdecl_name_state2) + return "notationdecl_name_state2"; + else if(state == notationdecl_content_state) + return "notationdecl_content_state"; + return "unknown"; } #endif Modified: trunk/faxpp/src/tokenizer_states.h =================================================================== --- trunk/faxpp/src/tokenizer_states.h 2008-02-24 02:26:52 UTC (rev 36) +++ trunk/faxpp/src/tokenizer_states.h 2008-02-25 12:05:13 UTC (rev 37) @@ -303,7 +303,18 @@ FAXPP_Error attlistdecl_name_seen_colon_state2(FAXPP_TokenizerEnv *env); FAXPP_Error attlistdecl_content_state(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_initial_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_initial_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_initial_state3(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_initial_state4(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_initial_state5(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_initial_state6(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_initial_state7(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_name_state1(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_name_state2(FAXPP_TokenizerEnv *env); +FAXPP_Error notationdecl_content_state(FAXPP_TokenizerEnv *env); + /********************* * * Tokenizer Helper Functions Modified: trunk/faxpp/src/xml_parser.c =================================================================== --- trunk/faxpp/src/xml_parser.c 2008-02-24 02:26:52 UTC (rev 36) +++ trunk/faxpp/src/xml_parser.c 2008-02-25 12:05:13 UTC (rev 37) @@ -826,6 +826,8 @@ case ATTLISTDECL_PREFIX_TOKEN: case ATTLISTDECL_NAME_TOKEN: case ATTLISTDECL_CONTENT_TOKEN: + case NOTATIONDECL_NAME_TOKEN: + case NOTATIONDECL_CONTENT_TOKEN: // TBD - jpcs break; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |