[Faxpp-devel] SF.net SVN: faxpp: [47] trunk/faxpp
Status: Beta
                
                Brought to you by:
                
                    jpcs
                    
                
            | 
     
      
      
      From: <jp...@us...> - 2008-03-20 01:56:14
      
     
   | 
Revision: 47
          http://faxpp.svn.sourceforge.net/faxpp/?rev=47&view=rev
Author:   jpcs
Date:     2008-03-19 18:56:21 -0700 (Wed, 19 Mar 2008)
Log Message:
-----------
Added recognition of parameter entities in entity and notation
declarations.
Raise errors for redeclaration of "xml" and "xmlns" namespaces.
Validate the value of "xml:space" attributes.
Modified Paths:
--------------
    trunk/faxpp/Makefile.am
    trunk/faxpp/Makefile.in
    trunk/faxpp/TODO
    trunk/faxpp/include/faxpp/error.h
    trunk/faxpp/src/doctype.c
    trunk/faxpp/src/entitydecl.c
    trunk/faxpp/src/error.c
    trunk/faxpp/src/notationdecl.c
    trunk/faxpp/src/tokenizer_states.c
    trunk/faxpp/src/tokenizer_states.h
    trunk/faxpp/src/xml_parser.c
    trunk/faxpp/tests/xmlconf_runner.c
Added Paths:
-----------
    trunk/faxpp/src/system_public_states.h
Modified: trunk/faxpp/Makefile.am
===================================================================
--- trunk/faxpp/Makefile.am	2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/Makefile.am	2008-03-20 01:56:21 UTC (rev 47)
@@ -37,7 +37,8 @@
 src/attlistdecl.c \
 src/notationdecl.c \
 src/entitydecl.c \
-src/conditional.c
+src/conditional.c \
+src/system_public_states.h
 
 tokenizer_example_LDADD = libfaxpp.la
 tokenizer_example_SOURCES = examples/tokenizer_example.c
@@ -46,13 +47,17 @@
 parser_example_SOURCES = \
 examples/parser_example.c \
 examples/entity_resolver.c \
-examples/output_event.c
+examples/entity_resolver.h \
+examples/output_event.c \
+examples/output_event.h
 
 xmlconf_runner_LDADD = libfaxpp.la
 xmlconf_runner_SOURCES = \
 tests/xmlconf_runner.c \
 examples/entity_resolver.c \
-examples/output_event.c
+examples/entity_resolver.h \
+examples/output_event.c \
+examples/output_event.h
 
 EXTRA_DIST = \
 docs/Doxyfile.api \
Modified: trunk/faxpp/Makefile.in
===================================================================
--- trunk/faxpp/Makefile.in	2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/Makefile.in	2008-03-20 01:56:21 UTC (rev 47)
@@ -260,7 +260,8 @@
 src/attlistdecl.c \
 src/notationdecl.c \
 src/entitydecl.c \
-src/conditional.c
+src/conditional.c \
+src/system_public_states.h
 
 tokenizer_example_LDADD = libfaxpp.la
 tokenizer_example_SOURCES = examples/tokenizer_example.c
@@ -268,13 +269,17 @@
 parser_example_SOURCES = \
 examples/parser_example.c \
 examples/entity_resolver.c \
-examples/output_event.c
+examples/entity_resolver.h \
+examples/output_event.c \
+examples/output_event.h
 
 xmlconf_runner_LDADD = libfaxpp.la
 xmlconf_runner_SOURCES = \
 tests/xmlconf_runner.c \
 examples/entity_resolver.c \
-examples/output_event.c
+examples/entity_resolver.h \
+examples/output_event.c \
+examples/output_event.h
 
 EXTRA_DIST = \
 docs/Doxyfile.api \
Modified: trunk/faxpp/TODO
===================================================================
--- trunk/faxpp/TODO	2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/TODO	2008-03-20 01:56:21 UTC (rev 47)
@@ -1,17 +1,11 @@
 Small tasks
 -----------
 
-Don't accept Namespace 1.1 undefines in XML 1.0 mode
 Normalize line endings in element character content / PI values / comment values
 Accept XML 1.1 line endings as whitespace
-Handle "xml" namespace properly
-xml:space value checking
-Error for redefining "xml" namespace
-Error for defining "xmlns" namespace
-Parse element decls correctly
-Parse parameter entities in markup correctly
 
 Large tasks
 -----------
 
+Attribute default values
 DTD validation
Modified: trunk/faxpp/include/faxpp/error.h
===================================================================
--- trunk/faxpp/include/faxpp/error.h	2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/include/faxpp/error.h	2008-03-20 01:56:21 UTC (rev 47)
@@ -75,7 +75,9 @@
 
   ELEMENT_NAME_MISMATCH,
   NO_URI_FOR_PREFIX,
-  DUPLICATE_ATTRIBUTES
+  DUPLICATE_ATTRIBUTES,
+  INVALID_NAMESPACE_DECLARATION,
+  INVALID_XMLSPACE_VALUE
 } FAXPP_Error;
 
 /**
Modified: trunk/faxpp/src/doctype.c
===================================================================
--- trunk/faxpp/src/doctype.c	2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/src/doctype.c	2008-03-20 01:56:21 UTC (rev 47)
@@ -198,12 +198,10 @@
   WHITESPACE:
     break;
   case 'S':
-    env->stored_state = doctype_internal_subset_start_state;
-    env->state = system_id_initial_state1;
+    env->state = doctype_system_id_initial_state1;
     break;
   case 'P':
-    env->stored_state = doctype_internal_subset_start_state;
-    env->state = public_id_initial_state1;
+    env->state = doctype_public_id_initial_state1;
     break;
   case '[':
     env->state = internal_subset_state;
@@ -223,292 +221,15 @@
   return NO_ERROR;
 }
 
-SINGLE_CHAR_STATE(system_id_initial_state1, 'Y', 0, system_id_initial_state2, INVALID_SYSTEM_ID)
-SINGLE_CHAR_STATE(system_id_initial_state2, 'S', 0, system_id_initial_state3, INVALID_SYSTEM_ID)
-SINGLE_CHAR_STATE(system_id_initial_state3, 'T', 0, system_id_initial_state4, INVALID_SYSTEM_ID)
-SINGLE_CHAR_STATE(system_id_initial_state4, 'E', 0, system_id_initial_state5, INVALID_SYSTEM_ID)
-SINGLE_CHAR_STATE(system_id_initial_state5, 'M', 0, system_id_ws_state, INVALID_SYSTEM_ID)
+#define PREFIX(name) doctype_ ## name
+#define END_STATE doctype_internal_subset_start_state
 
-FAXPP_Error
-system_id_ws_state(FAXPP_TokenizerEnv *env)
-{
-  read_char(env);
+#include "system_public_states.h"
 
-  switch(env->current_char) {
-  WHITESPACE:
-    env->state = system_literal_start_state;
-    next_char(env);
-    break;
-  default:
-    env->state = system_literal_start_state;
-    return EXPECTING_WHITESPACE;
-  }
-  return NO_ERROR;
-}
+#undef END_STATE
+#undef PREFIX
 
 FAXPP_Error
-system_literal_start_state(FAXPP_TokenizerEnv *env)
-{
-  read_char(env);
-
-  switch(env->current_char) {
-  WHITESPACE:
-    next_char(env);
-    return NO_ERROR;
-  case '"':
-    env->state = system_literal_quot_state;
-    break;
-  case '\'':
-    env->state = system_literal_apos_state;
-    break;
-  default:
-    next_char(env);
-    return EXPECTING_SYSTEM_LITERAL;
-  }
-  next_char(env);
-  token_start_position(env);
-  return NO_ERROR;
-}
-
-FAXPP_Error
-system_literal_apos_state(FAXPP_TokenizerEnv *env)
-{
-  while(1) {
-    read_char(env);
-
-    switch(env->current_char) {
-    case '\'':
-      retrieve_state(env);
-      token_end_position(env);
-      report_token(SYSTEM_LITERAL_TOKEN, env);
-      next_char(env);
-      return NO_ERROR;
-    LINE_ENDINGS
-    default:
-      if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
-        next_char(env);
-        return RESTRICTED_CHAR;
-      }
-      break;
-    }
-    next_char(env);
-  }
-
-  // Never happens
-  return NO_ERROR;
-}
-
-FAXPP_Error
-system_literal_quot_state(FAXPP_TokenizerEnv *env)
-{
-  while(1) {
-    read_char(env);
-
-    switch(env->current_char) {
-    case '"':
-      retrieve_state(env);
-      token_end_position(env);
-      report_token(SYSTEM_LITERAL_TOKEN, env);
-      next_char(env);
-      return NO_ERROR;
-    LINE_ENDINGS
-    default:
-      if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
-        next_char(env);
-        return RESTRICTED_CHAR;
-      }
-      break;
-    }
-    next_char(env);
-  }
-
-  // Never happens
-  return NO_ERROR;
-}
-
-SINGLE_CHAR_STATE(public_id_initial_state1, 'U', 0, public_id_initial_state2, INVALID_PUBLIC_ID)
-SINGLE_CHAR_STATE(public_id_initial_state2, 'B', 0, public_id_initial_state3, INVALID_PUBLIC_ID)
-SINGLE_CHAR_STATE(public_id_initial_state3, 'L', 0, public_id_initial_state4, INVALID_PUBLIC_ID)
-SINGLE_CHAR_STATE(public_id_initial_state4, 'I', 0, public_id_initial_state5, INVALID_PUBLIC_ID)
-SINGLE_CHAR_STATE(public_id_initial_state5, 'C', 0, public_id_ws_state, INVALID_PUBLIC_ID)
-
-FAXPP_Error
-public_id_ws_state(FAXPP_TokenizerEnv *env)
-{
-  read_char(env);
-
-  switch(env->current_char) {
-  WHITESPACE:
-    env->state = pubid_literal_start_state;
-    next_char(env);
-    break;
-  default:
-    env->state = pubid_literal_start_state;
-    return EXPECTING_WHITESPACE;
-  }
-  return NO_ERROR;
-}
-
-FAXPP_Error
-pubid_literal_start_state(FAXPP_TokenizerEnv *env)
-{
-  read_char(env);
-
-  switch(env->current_char) {
-  WHITESPACE:
-    next_char(env);
-    return NO_ERROR;
-  case '"':
-    env->state = pubid_literal_quot_state;
-    break;
-  case '\'':
-    env->state = pubid_literal_apos_state;
-    break;
-  default:
-    next_char(env);
-    return EXPECTING_PUBID_LITERAL;
-  }
-  next_char(env);
-  token_start_position(env);
-  return NO_ERROR;
-}
-
-FAXPP_Error
-pubid_literal_apos_state(FAXPP_TokenizerEnv *env)
-{
-  while(1) {
-    read_char(env);
-
-    switch(env->current_char) {
-    case '\'':
-      env->state = public_id_ws_state2;
-      token_end_position(env);
-      report_token(PUBID_LITERAL_TOKEN, env);
-      next_char(env);
-      return NO_ERROR;
-    // [13]   	PubidChar	   ::=   	#x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
-    LINE_ENDINGS
-    // A-Z
-               case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
-    case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F:
-    case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57:
-    case 0x58: case 0x59: case 0x5A:
-    // a-z
-               case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67:
-    case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
-    case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
-    case 0x78: case 0x79: case 0x7A:
-    // 0-9
-    case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8':
-    case '9': 
-    case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':':
-    case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_':
-    case '%':
-      // Valid PubidChar
-      break;
-    default:
-      next_char(env);
-      return INVALID_CHAR_IN_PUBID_LITERAL;
-    }
-    next_char(env);
-  }
-
-  // Never happens
-  return NO_ERROR;
-}
-
-FAXPP_Error
-pubid_literal_quot_state(FAXPP_TokenizerEnv *env)
-{
-  while(1) {
-    read_char(env);
-
-    switch(env->current_char) {
-    case '"':
-      env->state = public_id_ws_state2;
-      token_end_position(env);
-      report_token(PUBID_LITERAL_TOKEN, env);
-      next_char(env);
-      return NO_ERROR;
-    // [13]   	PubidChar	   ::=   	#x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
-    LINE_ENDINGS
-    // A-Z
-               case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
-    case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F:
-    case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57:
-    case 0x58: case 0x59: case 0x5A:
-    // a-z
-               case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67:
-    case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
-    case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
-    case 0x78: case 0x79: case 0x7A:
-    // 0-9
-    case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8':
-    case '9': 
-    case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':':
-    case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_':
-    case '%': case '\'':
-      // Valid PubidChar
-      break;
-    default:
-      next_char(env);
-      return INVALID_CHAR_IN_PUBID_LITERAL;
-    }
-    next_char(env);
-  }
-
-  // Never happens
-  return NO_ERROR;
-}
-
-FAXPP_Error
-public_id_ws_state2(FAXPP_TokenizerEnv *env)
-{
-  read_char(env);
-
-  switch(env->current_char) {
-  WHITESPACE:
-    env->state = public_id_ws_state3;
-    next_char(env);
-    break;
-  case '>':
-    if(env->stored_state == notationdecl_end_state) {
-      // Notation decls can skip the system literal
-      retrieve_state(env);
-      return NO_ERROR;
-    }
-    // Fall through
-  default:
-    env->state = system_literal_start_state;
-    return EXPECTING_WHITESPACE;
-  }
-  return NO_ERROR;
-}
-
-FAXPP_Error
-public_id_ws_state3(FAXPP_TokenizerEnv *env)
-{
-  read_char(env);
-
-  switch(env->current_char) {
-  WHITESPACE:
-    next_char(env);
-    break;
-  case '>':
-    if(env->stored_state == notationdecl_end_state) {
-      // Notation decls can skip the system literal
-      retrieve_state(env);
-      return NO_ERROR;
-    }
-    // Fall through
-  default:
-    env->state = system_literal_start_state;
-    break;
-  }
-  return NO_ERROR;
-}
-
-FAXPP_Error
 doctype_internal_subset_start_state(FAXPP_TokenizerEnv *env)
 {
   read_char(env);
Modified: trunk/faxpp/src/entitydecl.c
===================================================================
--- trunk/faxpp/src/entitydecl.c	2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/src/entitydecl.c	2008-03-20 01:56:21 UTC (rev 47)
@@ -111,12 +111,10 @@
     token_start_position(env);
     return NO_ERROR;
   case 'S':
-    env->stored_state = entitydecl_ws_state;
-    env->state = system_id_initial_state1;
+    env->state = entitydecl_system_id_initial_state1;
     break;
   case 'P':
-    env->stored_state = entitydecl_ws_state;
-    env->state = public_id_initial_state1;
+    env->state = entitydecl_public_id_initial_state1;
     break;
   LINE_ENDINGS
   default:
@@ -127,6 +125,16 @@
   return NO_ERROR;
 }
 
+#define PREFIX(name) entitydecl_ ## name
+#define END_STATE entitydecl_ws_state
+#define ALLOW_PARAMETER_ENTITIES
+
+#include "system_public_states.h"
+
+#undef ALLOW_PARAMETER_ENTITIES
+#undef END_STATE
+#undef PREFIX
+
 FAXPP_Error
 entitydecl_value_apos_state(FAXPP_TokenizerEnv *env)
 {
@@ -422,12 +430,10 @@
     token_start_position(env);
     return NO_ERROR;
   case 'S':
-    env->stored_state = paramentitydecl_end_state;
-    env->state = system_id_initial_state1;
+    env->state = paramentitydecl_system_id_initial_state1;
     break;
   case 'P':
-    env->stored_state = paramentitydecl_end_state;
-    env->state = public_id_initial_state1;
+    env->state = paramentitydecl_public_id_initial_state1;
     break;
   LINE_ENDINGS
   default:
@@ -438,6 +444,16 @@
   return NO_ERROR;
 }
 
+#define PREFIX(name) paramentitydecl_ ## name
+#define END_STATE paramentitydecl_end_state
+#define ALLOW_PARAMETER_ENTITIES
+
+#include "system_public_states.h"
+
+#undef ALLOW_PARAMETER_ENTITIES
+#undef END_STATE
+#undef PREFIX
+
 FAXPP_Error
 paramentitydecl_value_apos_state(FAXPP_TokenizerEnv *env)
 {
Modified: trunk/faxpp/src/error.c
===================================================================
--- trunk/faxpp/src/error.c	2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/src/error.c	2008-03-20 01:56:21 UTC (rev 47)
@@ -137,6 +137,10 @@
     return "IMPROPER_NESTING_OF_ENTITY";
   case PARAMETER_ENTITY_IN_INTERNAL_SUBSET:
     return "PARAMETER_ENTITY_IN_INTERNAL_SUBSET";
+  case INVALID_NAMESPACE_DECLARATION:
+    return "INVALID_NAMESPACE_DECLARATION";
+  case INVALID_XMLSPACE_VALUE:
+    return "INVALID_XMLSPACE_VALUE";
   case NO_ERROR:
     break;
   }
Modified: trunk/faxpp/src/notationdecl.c
===================================================================
--- trunk/faxpp/src/notationdecl.c	2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/src/notationdecl.c	2008-03-20 01:56:21 UTC (rev 47)
@@ -97,12 +97,10 @@
 
   switch(env->current_char) {
   case 'S':
-    env->stored_state = notationdecl_end_state;
-    env->state = system_id_initial_state1;
+    env->state = notationdecl_system_id_initial_state1;
     break;
   case 'P':
-    env->stored_state = notationdecl_end_state;
-    env->state = public_id_initial_state1;
+    env->state = notationdecl_public_id_initial_state1;
     break;
   LINE_ENDINGS
   default:
@@ -113,6 +111,18 @@
   return NO_ERROR;
 }
 
+#define PREFIX(name) notationdecl_ ## name
+#define END_STATE notationdecl_end_state
+#define SKIP_SYSTEM_LITERAL
+#define ALLOW_PARAMETER_ENTITIES
+
+#include "system_public_states.h"
+
+#undef ALLOW_PARAMETER_ENTITIES
+#undef SKIP_SYSTEM_LITERAL
+#undef END_STATE
+#undef PREFIX
+
 FAXPP_Error
 notationdecl_end_state(FAXPP_TokenizerEnv *env)
 {
Added: trunk/faxpp/src/system_public_states.h
===================================================================
--- trunk/faxpp/src/system_public_states.h	                        (rev 0)
+++ trunk/faxpp/src/system_public_states.h	2008-03-20 01:56:21 UTC (rev 47)
@@ -0,0 +1,392 @@
+/*
+ * Copyright 2007 Doxological Ltd.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file needs to have a number of macros defined before it is included
+// PREFIX(name)
+// END_STATE
+// SKIP_SYSTEM_LITERAL
+// ALLOW_PARAMETER_ENTITIES
+
+#define SP_SINGLE_CHAR_STATE(name, ch, next_stored_state, next_state, error) \
+FAXPP_Error \
+name(FAXPP_TokenizerEnv *env) \
+{ \
+  read_char(env); \
+\
+  switch(env->current_char) { \
+  case (ch): \
+    if((next_stored_state) != 0) env->stored_state = (next_stored_state); \
+    env->state = (next_state); \
+    next_char(env); \
+    break; \
+  LINE_ENDINGS \
+  default: \
+    next_char(env); \
+    return (error); \
+  } \
+  return NO_ERROR; \
+}
+
+SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state1), 'Y', 0, PREFIX(system_id_initial_state2), INVALID_SYSTEM_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state2), 'S', 0, PREFIX(system_id_initial_state3), INVALID_SYSTEM_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state3), 'T', 0, PREFIX(system_id_initial_state4), INVALID_SYSTEM_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state4), 'E', 0, PREFIX(system_id_initial_state5), INVALID_SYSTEM_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(system_id_initial_state5), 'M', 0, PREFIX(system_id_ws_state), INVALID_SYSTEM_ID)
+
+FAXPP_Error
+PREFIX(system_id_ws_state)(FAXPP_TokenizerEnv *env)
+{
+  read_char(env);
+
+  switch(env->current_char) {
+  WHITESPACE:
+    env->state = PREFIX(system_literal_start_state);
+    next_char(env);
+    break;
+#ifdef ALLOW_PARAMETER_ENTITIES
+  case '%':
+    store_state(env);
+    env->state = parameter_entity_reference_in_markup_state;
+    next_char(env);
+    token_start_position(env);
+    if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+      return NO_ERROR;
+    return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+#endif
+  default:
+    env->state = PREFIX(system_literal_start_state);
+    return EXPECTING_WHITESPACE;
+  }
+  return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(system_literal_start_state)(FAXPP_TokenizerEnv *env)
+{
+  read_char(env);
+
+  switch(env->current_char) {
+  WHITESPACE:
+    next_char(env);
+    return NO_ERROR;
+#ifdef ALLOW_PARAMETER_ENTITIES
+  case '%':
+    store_state(env);
+    env->state = parameter_entity_reference_in_markup_state;
+    next_char(env);
+    token_start_position(env);
+    if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+      return NO_ERROR;
+    return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+#endif
+  case '"':
+    env->state = PREFIX(system_literal_quot_state);
+    break;
+  case '\'':
+    env->state = PREFIX(system_literal_apos_state);
+    break;
+  default:
+    next_char(env);
+    return EXPECTING_SYSTEM_LITERAL;
+  }
+  next_char(env);
+  token_start_position(env);
+  return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(system_literal_apos_state)(FAXPP_TokenizerEnv *env)
+{
+  while(1) {
+    read_char(env);
+
+    switch(env->current_char) {
+    case '\'':
+      env->state = END_STATE;
+      token_end_position(env);
+      report_token(SYSTEM_LITERAL_TOKEN, env);
+      next_char(env);
+      return NO_ERROR;
+    case '#':
+      next_char(env);
+      return INVALID_SYSTEM_ID;
+    LINE_ENDINGS
+    default:
+      if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+        next_char(env);
+        return RESTRICTED_CHAR;
+      }
+      break;
+    }
+    next_char(env);
+  }
+
+  // Never happens
+  return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(system_literal_quot_state)(FAXPP_TokenizerEnv *env)
+{
+  while(1) {
+    read_char(env);
+
+    switch(env->current_char) {
+    case '"':
+      env->state = END_STATE;
+      token_end_position(env);
+      report_token(SYSTEM_LITERAL_TOKEN, env);
+      next_char(env);
+      return NO_ERROR;
+    case '#':
+      next_char(env);
+      return INVALID_SYSTEM_ID;
+    LINE_ENDINGS
+    default:
+      if((FAXPP_char_flags(env->current_char) & env->non_restricted_char) == 0) {
+        next_char(env);
+        return RESTRICTED_CHAR;
+      }
+      break;
+    }
+    next_char(env);
+  }
+
+  // Never happens
+  return NO_ERROR;
+}
+
+SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state1), 'U', 0, PREFIX(public_id_initial_state2), INVALID_PUBLIC_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state2), 'B', 0, PREFIX(public_id_initial_state3), INVALID_PUBLIC_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state3), 'L', 0, PREFIX(public_id_initial_state4), INVALID_PUBLIC_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state4), 'I', 0, PREFIX(public_id_initial_state5), INVALID_PUBLIC_ID)
+SP_SINGLE_CHAR_STATE(PREFIX(public_id_initial_state5), 'C', 0, PREFIX(public_id_ws_state), INVALID_PUBLIC_ID)
+
+FAXPP_Error
+PREFIX(public_id_ws_state)(FAXPP_TokenizerEnv *env)
+{
+  read_char(env);
+
+  switch(env->current_char) {
+  WHITESPACE:
+    env->state = PREFIX(pubid_literal_start_state);
+    next_char(env);
+    break;
+#ifdef ALLOW_PARAMETER_ENTITIES
+  case '%':
+    store_state(env);
+    env->state = parameter_entity_reference_in_markup_state;
+    next_char(env);
+    token_start_position(env);
+    if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+      return NO_ERROR;
+    return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+#endif
+  default:
+    env->state = PREFIX(pubid_literal_start_state);
+    return EXPECTING_WHITESPACE;
+  }
+  return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(pubid_literal_start_state)(FAXPP_TokenizerEnv *env)
+{
+  read_char(env);
+
+  switch(env->current_char) {
+  WHITESPACE:
+    next_char(env);
+    return NO_ERROR;
+#ifdef ALLOW_PARAMETER_ENTITIES
+  case '%':
+    store_state(env);
+    env->state = parameter_entity_reference_in_markup_state;
+    next_char(env);
+    token_start_position(env);
+    if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+      return NO_ERROR;
+    return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+#endif
+  case '"':
+    env->state = PREFIX(pubid_literal_quot_state);
+    break;
+  case '\'':
+    env->state = PREFIX(pubid_literal_apos_state);
+    break;
+  default:
+    next_char(env);
+    return EXPECTING_PUBID_LITERAL;
+  }
+  next_char(env);
+  token_start_position(env);
+  return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(pubid_literal_apos_state)(FAXPP_TokenizerEnv *env)
+{
+  while(1) {
+    read_char(env);
+
+    switch(env->current_char) {
+    case '\'':
+      env->state = PREFIX(public_id_ws_state2);
+      token_end_position(env);
+      report_token(PUBID_LITERAL_TOKEN, env);
+      next_char(env);
+      return NO_ERROR;
+    // [13]   	PubidChar	   ::=   	#x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
+    LINE_ENDINGS
+    // A-Z
+               case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
+    case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F:
+    case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57:
+    case 0x58: case 0x59: case 0x5A:
+    // a-z
+               case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67:
+    case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
+    case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
+    case 0x78: case 0x79: case 0x7A:
+    // 0-9
+    case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8':
+    case '9': 
+    case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':':
+    case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_':
+    case '%':
+      // Valid PubidChar
+      break;
+    default:
+      next_char(env);
+      return INVALID_CHAR_IN_PUBID_LITERAL;
+    }
+    next_char(env);
+  }
+
+  // Never happens
+  return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(pubid_literal_quot_state)(FAXPP_TokenizerEnv *env)
+{
+  while(1) {
+    read_char(env);
+
+    switch(env->current_char) {
+    case '"':
+      env->state = PREFIX(public_id_ws_state2);
+      token_end_position(env);
+      report_token(PUBID_LITERAL_TOKEN, env);
+      next_char(env);
+      return NO_ERROR;
+    // [13]   	PubidChar	   ::=   	#x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
+    LINE_ENDINGS
+    // A-Z
+               case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
+    case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F:
+    case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57:
+    case 0x58: case 0x59: case 0x5A:
+    // a-z
+               case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67:
+    case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
+    case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
+    case 0x78: case 0x79: case 0x7A:
+    // 0-9
+    case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8':
+    case '9': 
+    case ' ': case '-': case '(': case ')': case '+': case ',': case '.': case '/': case ':':
+    case '=': case '?': case ';': case '!': case '*': case '#': case '@': case '$': case '_':
+    case '%': case '\'':
+      // Valid PubidChar
+      break;
+    default:
+      next_char(env);
+      return INVALID_CHAR_IN_PUBID_LITERAL;
+    }
+    next_char(env);
+  }
+
+  // Never happens
+  return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(public_id_ws_state2)(FAXPP_TokenizerEnv *env)
+{
+  read_char(env);
+
+  switch(env->current_char) {
+  WHITESPACE:
+    env->state = PREFIX(public_id_ws_state3);
+    next_char(env);
+    break;
+#ifdef ALLOW_PARAMETER_ENTITIES
+  case '%':
+    store_state(env);
+    env->state = parameter_entity_reference_in_markup_state;
+    next_char(env);
+    token_start_position(env);
+    if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+      return NO_ERROR;
+    return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+#endif
+#ifdef SKIP_SYSTEM_LITERAL
+  case '>':
+    // Notation decls can skip the system literal
+    env->state = END_STATE;
+    return NO_ERROR;
+#endif
+  default:
+    env->state = PREFIX(system_literal_start_state);
+    return EXPECTING_WHITESPACE;
+  }
+  return NO_ERROR;
+}
+
+FAXPP_Error
+PREFIX(public_id_ws_state3)(FAXPP_TokenizerEnv *env)
+{
+  read_char(env);
+
+  switch(env->current_char) {
+  WHITESPACE:
+    next_char(env);
+    break;
+#ifdef ALLOW_PARAMETER_ENTITIES
+  case '%':
+    store_state(env);
+    env->state = parameter_entity_reference_in_markup_state;
+    next_char(env);
+    token_start_position(env);
+    if(env->external_subset || env->external_dtd_entity || env->external_in_markup_entity)
+      return NO_ERROR;
+    return PARAMETER_ENTITY_IN_INTERNAL_SUBSET;
+#endif
+#ifdef SKIP_SYSTEM_LITERAL
+  case '>':
+    // Notation decls can skip the system literal
+    env->state = END_STATE;
+    return NO_ERROR;
+#endif
+  default:
+    env->state = PREFIX(system_literal_start_state);
+    break;
+  }
+  return NO_ERROR;
+}
+
+#undef SP_SINGLE_CHAR_STATE
Modified: trunk/faxpp/src/tokenizer_states.c
===================================================================
--- trunk/faxpp/src/tokenizer_states.c	2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/src/tokenizer_states.c	2008-03-20 01:56:21 UTC (rev 47)
@@ -450,47 +450,47 @@
   else if(state == external_subset_decl_state)
     return "external_subset_decl_state";
 
-  else if(state == system_id_initial_state1)
-    return "system_id_initial_state1";
-  else if(state == system_id_initial_state2)
-    return "system_id_initial_state2";
-  else if(state == system_id_initial_state3)
-    return "system_id_initial_state3";
-  else if(state == system_id_initial_state4)
-    return "system_id_initial_state4";
-  else if(state == system_id_initial_state5)
-    return "system_id_initial_state5";
-  else if(state == system_id_ws_state)
-    return "system_id_ws_state";
-  else if(state == system_literal_start_state)
-    return "system_literal_start_state";
-  else if(state == system_literal_apos_state)
-    return "system_literal_apos_state";
-  else if(state == system_literal_quot_state)
-    return "system_literal_quot_state";
+  else if(state == doctype_system_id_initial_state1)
+    return "doctype_system_id_initial_state1";
+  else if(state == doctype_system_id_initial_state2)
+    return "doctype_system_id_initial_state2";
+  else if(state == doctype_system_id_initial_state3)
+    return "doctype_system_id_initial_state3";
+  else if(state == doctype_system_id_initial_state4)
+    return "doctype_system_id_initial_state4";
+  else if(state == doctype_system_id_initial_state5)
+    return "doctype_system_id_initial_state5";
+  else if(state == doctype_system_id_ws_state)
+    return "doctype_system_id_ws_state";
+  else if(state == doctype_system_literal_start_state)
+    return "doctype_system_literal_start_state";
+  else if(state == doctype_system_literal_apos_state)
+    return "doctype_system_literal_apos_state";
+  else if(state == doctype_system_literal_quot_state)
+    return "doctype_system_literal_quot_state";
 
-  else if(state == public_id_initial_state1)
-    return "public_id_initial_state1";
-  else if(state == public_id_initial_state2)
-    return "public_id_initial_state2";
-  else if(state == public_id_initial_state3)
-    return "public_id_initial_state3";
-  else if(state == public_id_initial_state4)
-    return "public_id_initial_state4";
-  else if(state == public_id_initial_state5)
-    return "public_id_initial_state5";
-  else if(state == public_id_ws_state)
-    return "public_id_ws_state";
-  else if(state == pubid_literal_start_state)
-    return "pubid_literal_start_state";
-  else if(state == pubid_literal_apos_state)
-    return "pubid_literal_apos_state";
-  else if(state == pubid_literal_quot_state)
-    return "pubid_literal_quot_state";
-  else if(state == public_id_ws_state2)
-    return "public_id_ws_state2";
-  else if(state == public_id_ws_state3)
-    return "public_id_ws_state3";
+  else if(state == doctype_public_id_initial_state1)
+    return "doctype_public_id_initial_state1";
+  else if(state == doctype_public_id_initial_state2)
+    return "doctype_public_id_initial_state2";
+  else if(state == doctype_public_id_initial_state3)
+    return "doctype_public_id_initial_state3";
+  else if(state == doctype_public_id_initial_state4)
+    return "doctype_public_id_initial_state4";
+  else if(state == doctype_public_id_initial_state5)
+    return "doctype_public_id_initial_state5";
+  else if(state == doctype_public_id_ws_state)
+    return "doctype_public_id_ws_state";
+  else if(state == doctype_pubid_literal_start_state)
+    return "doctype_pubid_literal_start_state";
+  else if(state == doctype_pubid_literal_apos_state)
+    return "doctype_pubid_literal_apos_state";
+  else if(state == doctype_pubid_literal_quot_state)
+    return "doctype_pubid_literal_quot_state";
+  else if(state == doctype_public_id_ws_state2)
+    return "doctype_public_id_ws_state2";
+  else if(state == doctype_public_id_ws_state3)
+    return "doctype_public_id_ws_state3";
 
   else if(state == elementdecl_or_entitydecl_state)
     return "elementdecl_or_entitydecl_state";
@@ -797,6 +797,47 @@
   else if(state == notationdecl_end_state)
     return "notationdecl_end_state";
 
+  else if(state == notationdecl_system_id_initial_state1)
+    return "notationdecl_system_id_initial_state1";
+  else if(state == notationdecl_system_id_initial_state2)
+    return "notationdecl_system_id_initial_state2";
+  else if(state == notationdecl_system_id_initial_state3)
+    return "notationdecl_system_id_initial_state3";
+  else if(state == notationdecl_system_id_initial_state4)
+    return "notationdecl_system_id_initial_state4";
+  else if(state == notationdecl_system_id_initial_state5)
+    return "notationdecl_system_id_initial_state5";
+  else if(state == notationdecl_system_id_ws_state)
+    return "notationdecl_system_id_ws_state";
+  else if(state == notationdecl_system_literal_start_state)
+    return "notationdecl_system_literal_start_state";
+  else if(state == notationdecl_system_literal_apos_state)
+    return "notationdecl_system_literal_apos_state";
+  else if(state == notationdecl_system_literal_quot_state)
+    return "notationdecl_system_literal_quot_state";
+  else if(state == notationdecl_public_id_initial_state1)
+    return "notationdecl_public_id_initial_state1";
+  else if(state == notationdecl_public_id_initial_state2)
+    return "notationdecl_public_id_initial_state2";
+  else if(state == notationdecl_public_id_initial_state3)
+    return "notationdecl_public_id_initial_state3";
+  else if(state == notationdecl_public_id_initial_state4)
+    return "notationdecl_public_id_initial_state4";
+  else if(state == notationdecl_public_id_initial_state5)
+    return "notationdecl_public_id_initial_state5";
+  else if(state == notationdecl_public_id_ws_state)
+    return "notationdecl_public_id_ws_state";
+  else if(state == notationdecl_pubid_literal_start_state)
+    return "notationdecl_pubid_literal_start_state";
+  else if(state == notationdecl_pubid_literal_apos_state)
+    return "notationdecl_pubid_literal_apos_state";
+  else if(state == notationdecl_pubid_literal_quot_state)
+    return "notationdecl_pubid_literal_quot_state";
+  else if(state == notationdecl_public_id_ws_state2)
+    return "notationdecl_public_id_ws_state2";
+  else if(state == notationdecl_public_id_ws_state3)
+    return "notationdecl_public_id_ws_state3";
+
   else if(state == entitydecl_initial_state1)
     return "entitydecl_initial_state1";
   else if(state == entitydecl_initial_state1)
@@ -837,6 +878,47 @@
   else if(state == entitydecl_end_state)
     return "entitydecl_end_state";
 
+  else if(state == entitydecl_system_id_initial_state1)
+    return "entitydecl_system_id_initial_state1";
+  else if(state == entitydecl_system_id_initial_state2)
+    return "entitydecl_system_id_initial_state2";
+  else if(state == entitydecl_system_id_initial_state3)
+    return "entitydecl_system_id_initial_state3";
+  else if(state == entitydecl_system_id_initial_state4)
+    return "entitydecl_system_id_initial_state4";
+  else if(state == entitydecl_system_id_initial_state5)
+    return "entitydecl_system_id_initial_state5";
+  else if(state == entitydecl_system_id_ws_state)
+    return "entitydecl_system_id_ws_state";
+  else if(state == entitydecl_system_literal_start_state)
+    return "entitydecl_system_literal_start_state";
+  else if(state == entitydecl_system_literal_apos_state)
+    return "entitydecl_system_literal_apos_state";
+  else if(state == entitydecl_system_literal_quot_state)
+    return "entitydecl_system_literal_quot_state";
+  else if(state == entitydecl_public_id_initial_state1)
+    return "entitydecl_public_id_initial_state1";
+  else if(state == entitydecl_public_id_initial_state2)
+    return "entitydecl_public_id_initial_state2";
+  else if(state == entitydecl_public_id_initial_state3)
+    return "entitydecl_public_id_initial_state3";
+  else if(state == entitydecl_public_id_initial_state4)
+    return "entitydecl_public_id_initial_state4";
+  else if(state == entitydecl_public_id_initial_state5)
+    return "entitydecl_public_id_initial_state5";
+  else if(state == entitydecl_public_id_ws_state)
+    return "entitydecl_public_id_ws_state";
+  else if(state == entitydecl_pubid_literal_start_state)
+    return "entitydecl_pubid_literal_start_state";
+  else if(state == entitydecl_pubid_literal_apos_state)
+    return "entitydecl_pubid_literal_apos_state";
+  else if(state == entitydecl_pubid_literal_quot_state)
+    return "entitydecl_pubid_literal_quot_state";
+  else if(state == entitydecl_public_id_ws_state2)
+    return "entitydecl_public_id_ws_state2";
+  else if(state == entitydecl_public_id_ws_state3)
+    return "entitydecl_public_id_ws_state3";
+
   else if(state == paramentitydecl_name_state1)
     return "paramentitydecl_name_state1";
   else if(state == paramentitydecl_name_state2)
@@ -850,6 +932,47 @@
   else if(state == paramentitydecl_end_state)
     return "paramentitydecl_end_state";
 
+  else if(state == paramentitydecl_system_id_initial_state1)
+    return "paramentitydecl_system_id_initial_state1";
+  else if(state == paramentitydecl_system_id_initial_state2)
+    return "paramentitydecl_system_id_initial_state2";
+  else if(state == paramentitydecl_system_id_initial_state3)
+    return "paramentitydecl_system_id_initial_state3";
+  else if(state == paramentitydecl_system_id_initial_state4)
+    return "paramentitydecl_system_id_initial_state4";
+  else if(state == paramentitydecl_system_id_initial_state5)
+    return "paramentitydecl_system_id_initial_state5";
+  else if(state == paramentitydecl_system_id_ws_state)
+    return "paramentitydecl_system_id_ws_state";
+  else if(state == paramentitydecl_system_literal_start_state)
+    return "paramentitydecl_system_literal_start_state";
+  else if(state == paramentitydecl_system_literal_apos_state)
+    return "paramentitydecl_system_literal_apos_state";
+  else if(state == paramentitydecl_system_literal_quot_state)
+    return "paramentitydecl_system_literal_quot_state";
+  else if(state == paramentitydecl_public_id_initial_state1)
+    return "paramentitydecl_public_id_initial_state1";
+  else if(state == paramentitydecl_public_id_initial_state2)
+    return "paramentitydecl_public_id_initial_state2";
+  else if(state == paramentitydecl_public_id_initial_state3)
+    return "paramentitydecl_public_id_initial_state3";
+  else if(state == paramentitydecl_public_id_initial_state4)
+    return "paramentitydecl_public_id_initial_state4";
+  else if(state == paramentitydecl_public_id_initial_state5)
+    return "paramentitydecl_public_id_initial_state5";
+  else if(state == paramentitydecl_public_id_ws_state)
+    return "paramentitydecl_public_id_ws_state";
+  else if(state == paramentitydecl_pubid_literal_start_state)
+    return "paramentitydecl_pubid_literal_start_state";
+  else if(state == paramentitydecl_pubid_literal_apos_state)
+    return "paramentitydecl_pubid_literal_apos_state";
+  else if(state == paramentitydecl_pubid_literal_quot_state)
+    return "paramentitydecl_pubid_literal_quot_state";
+  else if(state == paramentitydecl_public_id_ws_state2)
+    return "paramentitydecl_public_id_ws_state2";
+  else if(state == paramentitydecl_public_id_ws_state3)
+    return "paramentitydecl_public_id_ws_state3";
+
   else if(state == conditional_ws_state)
     return "conditional_ws_state";
   else if(state == conditional_state1)
Modified: trunk/faxpp/src/tokenizer_states.h
===================================================================
--- trunk/faxpp/src/tokenizer_states.h	2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/src/tokenizer_states.h	2008-03-20 01:56:21 UTC (rev 47)
@@ -274,27 +274,27 @@
 FAXPP_Error external_subset_markup_state(FAXPP_TokenizerEnv *env);
 FAXPP_Error external_subset_decl_state(FAXPP_TokenizerEnv *env);
 
-FAXPP_Error system_id_initial_state1(FAXPP_TokenizerEnv *env);
-FAXPP_Error system_id_initial_state2(FAXPP_TokenizerEnv *env);
-FAXPP_Error system_id_initial_state3(FAXPP_TokenizerEnv *env);
-FAXPP_Error system_id_initial_state4(FAXPP_TokenizerEnv *env);
-FAXPP_Error system_id_initial_state5(FAXPP_TokenizerEnv *env);
-FAXPP_Error system_id_ws_state(FAXPP_TokenizerEnv *env);
-FAXPP_Error system_literal_start_state(FAXPP_TokenizerEnv *env);
-FAXPP_Error system_literal_apos_state(FAXPP_TokenizerEnv *env);
-FAXPP_Error system_literal_quot_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_system_id_initial_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_system_id_initial_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_system_id_initial_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_system_id_initial_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_system_id_initial_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_system_id_ws_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_system_literal_start_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_system_literal_apos_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_system_literal_quot_state(FAXPP_TokenizerEnv *env);
 
-FAXPP_Error public_id_initial_state1(FAXPP_TokenizerEnv *env);
-FAXPP_Error public_id_initial_state2(FAXPP_TokenizerEnv *env);
-FAXPP_Error public_id_initial_state3(FAXPP_TokenizerEnv *env);
-FAXPP_Error public_id_initial_state4(FAXPP_TokenizerEnv *env);
-FAXPP_Error public_id_initial_state5(FAXPP_TokenizerEnv *env);
-FAXPP_Error public_id_ws_state(FAXPP_TokenizerEnv *env);
-FAXPP_Error pubid_literal_start_state(FAXPP_TokenizerEnv *env);
-FAXPP_Error pubid_literal_apos_state(FAXPP_TokenizerEnv *env);
-FAXPP_Error pubid_literal_quot_state(FAXPP_TokenizerEnv *env);
-FAXPP_Error public_id_ws_state2(FAXPP_TokenizerEnv *env);
-FAXPP_Error public_id_ws_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_public_id_initial_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_public_id_initial_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_public_id_initial_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_public_id_initial_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_public_id_initial_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_public_id_ws_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_pubid_literal_start_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_pubid_literal_apos_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_pubid_literal_quot_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_public_id_ws_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error doctype_public_id_ws_state3(FAXPP_TokenizerEnv *env);
 
 FAXPP_Error elementdecl_or_entitydecl_state(FAXPP_TokenizerEnv *env);
 FAXPP_Error elementdecl_initial_state1(FAXPP_TokenizerEnv *env);
@@ -449,6 +449,28 @@
 FAXPP_Error notationdecl_content_state(FAXPP_TokenizerEnv *env);
 FAXPP_Error notationdecl_end_state(FAXPP_TokenizerEnv *env);
 
+FAXPP_Error notationdecl_system_id_initial_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_system_id_initial_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_system_id_initial_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_system_id_initial_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_system_id_initial_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_system_id_ws_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_system_literal_start_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_system_literal_apos_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_system_literal_quot_state(FAXPP_TokenizerEnv *env);
+
+FAXPP_Error notationdecl_public_id_initial_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_public_id_initial_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_public_id_initial_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_public_id_initial_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_public_id_initial_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_public_id_ws_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_pubid_literal_start_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_pubid_literal_apos_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_pubid_literal_quot_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_public_id_ws_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error notationdecl_public_id_ws_state3(FAXPP_TokenizerEnv *env);
+
 FAXPP_Error entitydecl_initial_state1(FAXPP_TokenizerEnv *env);
 FAXPP_Error entitydecl_initial_state2(FAXPP_TokenizerEnv *env);
 FAXPP_Error entitydecl_initial_state3(FAXPP_TokenizerEnv *env);
@@ -469,6 +491,28 @@
 FAXPP_Error entitydecl_ndata_name_state2(FAXPP_TokenizerEnv *env);
 FAXPP_Error entitydecl_end_state(FAXPP_TokenizerEnv *env);
 
+FAXPP_Error entitydecl_system_id_initial_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_system_id_initial_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_system_id_initial_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_system_id_initial_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_system_id_initial_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_system_id_ws_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_system_literal_start_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_system_literal_apos_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_system_literal_quot_state(FAXPP_TokenizerEnv *env);
+
+FAXPP_Error entitydecl_public_id_initial_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_public_id_initial_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_public_id_initial_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_public_id_initial_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_public_id_initial_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_public_id_ws_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_pubid_literal_start_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_pubid_literal_apos_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_pubid_literal_quot_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_public_id_ws_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error entitydecl_public_id_ws_state3(FAXPP_TokenizerEnv *env);
+
 FAXPP_Error paramentitydecl_name_state1(FAXPP_TokenizerEnv *env);
 FAXPP_Error paramentitydecl_name_state2(FAXPP_TokenizerEnv *env);
 FAXPP_Error paramentitydecl_content_state(FAXPP_TokenizerEnv *env);
@@ -476,6 +520,28 @@
 FAXPP_Error paramentitydecl_value_quot_state(FAXPP_TokenizerEnv *env);
 FAXPP_Error paramentitydecl_end_state(FAXPP_TokenizerEnv *env);
 
+FAXPP_Error paramentitydecl_system_id_initial_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_system_id_initial_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_system_id_initial_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_system_id_initial_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_system_id_initial_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_system_id_ws_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_system_literal_start_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_system_literal_apos_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_system_literal_quot_state(FAXPP_TokenizerEnv *env);
+
+FAXPP_Error paramentitydecl_public_id_initial_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_public_id_initial_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_public_id_initial_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_public_id_initial_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_public_id_initial_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_public_id_ws_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_pubid_literal_start_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_pubid_literal_apos_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_pubid_literal_quot_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_public_id_ws_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error paramentitydecl_public_id_ws_state3(FAXPP_TokenizerEnv *env);
+
 FAXPP_Error conditional_ws_state(FAXPP_TokenizerEnv *env);
 FAXPP_Error conditional_state1(FAXPP_TokenizerEnv *env);
 FAXPP_Error conditional_state2(FAXPP_TokenizerEnv *env);
Modified: trunk/faxpp/src/xml_parser.c
===================================================================
--- trunk/faxpp/src/xml_parser.c	2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/src/xml_parser.c	2008-03-20 01:56:21 UTC (rev 47)
@@ -405,7 +405,7 @@
 {
   uri->ptr = 0;
   uri->len = 0;
-  return p_find_ns_info(parser, prefix, uri);
+  return p_find_ns_info_impl(parser, prefix, uri);
 }
 
 unsigned int FAXPP_get_nesting_level(const FAXPP_Parser *parser)
@@ -677,17 +677,6 @@
   } \
 }
 
-#define p_copy_text_from_attr_value(text, attrval, env, buffer) \
-{ \
-  if((attrval)->next == 0) { \
-    p_copy_text_from_event((text), &(attrval)->value, (env), (buffer)); \
-  } \
-  else { \
-    FAXPP_Error err = p_normalize_attr_value((text), (buffer), (attrval), (env)); \
-    if(err != NO_ERROR) return err; \
-  } \
-}
-
 FAXPP_Error p_normalize_attr_value(FAXPP_Text *text, FAXPP_Buffer *buffer, const FAXPP_AttrValue *value, const FAXPP_ParserEnv *env)
 {
   FAXPP_Error err;
@@ -1032,6 +1021,30 @@
   return text_ptr == text_end;
 }
 
+static int p_equals(const char *str, FAXPP_EncodeFunction encode, const FAXPP_Text *text)
+{
+  // No encoding represents a character with as many as 10 bytes
+  uint8_t encode_buffer[10];
+  unsigned int encode_len;
+
+  void *text_ptr = text->ptr;
+  void *text_end = text_ptr + text->len;
+
+  while(*str != 0) {
+    if(text_ptr >= text_end) return 0;
+
+    encode_len = encode(encode_buffer, encode_buffer + sizeof(encode_buffer), *str);
+    if((text_end - text_ptr) < encode_len || memcmp(encode_buffer, text_ptr, encode_len) != 0) {
+      return 0;
+    }
+
+    text_ptr += encode_len;
+    ++str;
+  }
+
+  return text_ptr == text_end;
+}
+
 static FAXPP_Error nc_start_document_next_event(FAXPP_ParserEnv *env)
 {
   FAXPP_NextEvent next;
@@ -1048,7 +1061,7 @@
       p_copy_text_from_token(&env->event.version, env, /*useTokenBuffer*/0);
       p_set_event_location_from_token(env);
 
-      if(p_case_insensitive_equals("1.1", env->tenv->transcoder.encode, &env->event.version)) {
+      if(p_equals("1.1", env->tenv->transcoder.encode, &env->event.version)) {
         if(env->xml_version == XML_VERSION_NOT_KNOWN) {
           env->xml_version = XML_VERSION_1_1;
         }
@@ -1069,7 +1082,7 @@
     case XML_DECL_STANDALONE_TOKEN:
       p_copy_text_from_token(&env->event.standalone, env, /*useTokenBuffer*/0);
 
-      if(p_case_insensitive_equals("YES", env->tenv->transcoder.encode, &env->event.standalone)) {
+      if(p_equals("yes", env->tenv->transcoder.encode, &env->event.standalone)) {
         env->standalone = 1;
       }
       break;
@@ -1605,6 +1618,7 @@
 
         err = p_parse_entity(env, ent, ATTRIBUTE_VALUE_ENTITY);
         if(err) goto error;
+        return NO_ERROR;
       }
       break;
     case PE_REFERENCE_TOKEN:
@@ -1639,6 +1653,7 @@
         p_set_text_from_text(&env->event.public_id, &bkup_public);
 
         if(err) goto error;
+        return NO_ERROR;
       }
       break;
     case PE_REFERENCE_IN_MARKUP_TOKEN:
@@ -1660,7 +1675,7 @@
       p_set_text_from_text(&env->event.public_id, &bkup_public);
 
       if(err) goto error;
-      break;
+      return NO_ERROR;
 
     case ELEMENTDECL_LPAR_TOKEN:
       cs = (FAXPP_ContentSpec*)malloc(sizeof(FAXPP_ContentSpec));
@@ -2045,6 +2060,7 @@
 
         err = p_parse_entity(env, ent, ATTRIBUTE_VALUE_ENTITY);
         if(err) goto error;
+        return NO_ERROR;
 
       } else {
         p_reset_event(env);
@@ -2119,10 +2135,36 @@
   return err;
 }
 
+static const char *xml_prefix = "xml";
+static const char *xmlns_prefix = "xmlns";
+static const char *xml_uri = "http://www.w3.org/XML/1998/namespace";
+static const char *xmlns_uri = "http://www.w3.org/2000/xmlns/";
+
 static FAXPP_Error p_add_ns_info(FAXPP_ParserEnv *env, const FAXPP_Attribute *attr)
 {
-  FAXPP_NamespaceInfo *nsinfo = env->namespace_pool;
+  FAXPP_NamespaceInfo *nsinfo;
 
+  // Check for invalid "xml" or "xmlns" namespace declarations
+  if(attr->prefix.len != 0) {
+    if(p_equals(xmlns_prefix, env->tenv->transcoder.encode, &attr->name))
+      return INVALID_NAMESPACE_DECLARATION;
+    if(p_equals(xml_prefix, env->tenv->transcoder.encode, &attr->name) &&
+       !p_equals(xml_uri, env->tenv->transcoder.encode, &attr->value.value))
+      return INVALID_NAMESPACE_DECLARATION;
+  }
+
+  if((attr->prefix.len == 0 || !p_equals(xml_prefix, env->tenv->transcoder.encode, &attr->name)) &&
+     p_equals(xml_uri, env->tenv->transcoder.encode, &attr->value.value))
+    return INVALID_NAMESPACE_DECLARATION;
+
+  if(p_equals(xmlns_uri, env->tenv->transcoder.encode, &attr->value.value))
+    return INVALID_NAMESPACE_DECLARATION;
+
+  if(env->tenv->xml_char == CHAR10 && attr->prefix.len != 0 && attr->value.value.len == 0)
+    return INVALID_NAMESPACE_DECLARATION;
+
+  // Add the namespace binding
+  nsinfo = env->namespace_pool;
   if(nsinfo == 0) {
     nsinfo = (FAXPP_NamespaceInfo*)malloc(sizeof(FAXPP_NamespaceInfo));
     if(!nsinfo) return OUT_OF_MEMORY;
@@ -2136,7 +2178,7 @@
   env->namespace_stack = nsinfo;
   env->element_info_stack->ns = nsinfo;
 
-  p_copy_text_from_attr_value(&nsinfo->uri, &attr->value, env, &env->element_info_stack->buffer);
+  p_copy_text_from_event(&nsinfo->uri, &attr->value.value, env, &env->element_info_stack->buffer);
 
   if(attr->prefix.len != 0) {
     p_copy_text_from_event(&nsinfo->prefix, &attr->name, env, &env->element_info_stack->buffer);
@@ -2166,6 +2208,10 @@
   }
 
   // The prefix "xml" is always bound to the namespace URI "http://www.w3.org/XML/1998/namespace"
+  if(p_equals(xml_prefix, env->tenv->transcoder.encode, prefix)) {
+    p_copy_text_from_str(uri, &((FAXPP_ParserEnv*)env)->event_buffer, (FAXPP_ParserEnv*)env, xml_uri);
+    return NO_ERROR;
+  }
 
   return NO_URI_FOR_PREFIX;
 }
@@ -2245,9 +2291,6 @@
   env->element_info_pool = einfo;
 }
 
-static const char *xml_uri = "http://www.w3.org/XML/1998/namespace";
-static const char *xmlns_uri = "http://www.w3.org/2000/xmlns/";
-
 static FAXPP_Error wf_next_event(FAXPP_ParserEnv *env)
 {
   int i, j;
@@ -2271,8 +2314,9 @@
       attr = &env->event.attrs[i];
 
       /* Normalize the attribute values if required */
-      if(env->tenv->normalize_attrs &&
-         (attr->value.type != CHARACTERS_EVENT || attr->value.next != 0)) {
+      if(attr->xmlns_attr || attr->xml_attr ||
+         (env->tenv->normalize_attrs &&
+          (attr->value.type != CHARACTERS_EVENT || attr->value.next != 0))) {
         err = p_normalize_attr_value(&tmpText, &env->event_buffer, &attr->value, env);
         if(err != 0) return err;
 
@@ -2287,7 +2331,7 @@
       /* Check for namespace attributes */
       if(attr->xmlns_attr) {
         err = p_add_ns_info(env, attr);
-        if(err != 0) {
+        if(err) {
           set_err_info_from_attr(env, attr);
           return err;
         }
@@ -2313,6 +2357,13 @@
       }
       else if(attr->xml_attr) {
         p_copy_text_from_str(&attr->uri, &env->event_buffer, env, xml_uri);
+
+        if(p_equals("space", env->tenv->transcoder.encode, &attr->name) &&
+           !p_equals("preserve", env->tenv->transcoder.encode, &attr->value.value) &&
+           !p_equals("default", env->tenv->transcoder.encode, &attr->value.value)) {
+          set_err_info_from_attr(env, attr);
+          return INVALID_XMLSPACE_VALUE;
+        }
       }
       else if(attr->prefix.len != 0) {
         err = p_find_ns_info(env, &attr->prefix, &attr->uri);
Modified: trunk/faxpp/tests/xmlconf_runner.c
===================================================================
--- trunk/faxpp/tests/xmlconf_runner.c	2008-03-18 13:35:36 UTC (rev 46)
+++ trunk/faxpp/tests/xmlconf_runner.c	2008-03-20 01:56:21 UTC (rev 47)
@@ -192,10 +192,15 @@
 
       else if(text_equal(event->name, "TEST")) {
         // TBD Check output - jpcs
-/*         if(find_attribute(event, "OUTPUT")) { */
+/*         attr = find_attribute(event, "OUTPUT"); */
+/*         if(attr) { */
+/*           calculateBase(base_buffer, &attr->value, file_buffer); */
+
 /*           printf("^"); */
+/*           printf("\n%s\n", file_buffer); */
 /*           fflush(stdout); */
 /*           ++test_skips; */
+/*           exit(-1); */
 /*           break; */
 /*         } */
 
@@ -213,11 +218,12 @@
           break;
         }
 
+        attr = find_attribute(event, "TYPE");
+
         // Skip "error" type tests at the moment - since they
         // probably need detailed inspection to see which ones
         // ought to pass or fail
         // TBD enable these tests - jpcs
-        attr = find_attribute(event, "TYPE");
         if(text_equal(attr->value.value, "error")) {
           printf("^");
           fflush(stdout);
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
 |