[Faxpp-devel] SF.net SVN: faxpp: [12] trunk/faxpp
Status: Beta
Brought to you by:
jpcs
|
From: <jp...@us...> - 2007-08-11 00:29:44
|
Revision: 12
http://faxpp.svn.sourceforge.net/faxpp/?rev=12&view=rev
Author: jpcs
Date: 2007-08-10 17:29:45 -0700 (Fri, 10 Aug 2007)
Log Message:
-----------
Remove FAXPP_EncodingCallback, and rewrite parser states to allow
setting of parser FAXPP_DecodeFunction after the START_DOCUMENT_EVENT.
Added decoding for ISO-8859-1 (Latin1).
Recognize the strings for the built in encodings in the tokenizer, and
select the correct encoding.
Don't accept colons in PI or entity names since the namespaces spec
forbids that.
Require whitespace between attributes.
Fix handling of namespace 1.1 prefix undefines.
Modified Paths:
--------------
trunk/faxpp/include/faxpp/parser.h
trunk/faxpp/include/faxpp/token.h
trunk/faxpp/include/faxpp/transcode.h
trunk/faxpp/src/attr_states.h
trunk/faxpp/src/element_states.h
trunk/faxpp/src/pi.c
trunk/faxpp/src/reference.c
trunk/faxpp/src/token.c
trunk/faxpp/src/tokenizer_states.c
trunk/faxpp/src/tokenizer_states.h
trunk/faxpp/src/transcode.c
trunk/faxpp/src/xml_parser.c
trunk/faxpp/src/xml_parser.h
trunk/faxpp/src/xml_tokenizer.c
trunk/faxpp/src/xmldecl.c
trunk/faxpp/tests/xmlconf_runner.c
Modified: trunk/faxpp/include/faxpp/parser.h
===================================================================
--- trunk/faxpp/include/faxpp/parser.h 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/include/faxpp/parser.h 2007-08-11 00:29:45 UTC (rev 12)
@@ -103,22 +103,6 @@
typedef unsigned int (*FAXPP_ReadCallback)(void *userData, void *buffer, unsigned int length);
/**
- * The function called when faxpp reads an encoding declaration in the XML document, or determines
- * that the document does not contain an encoding declaration. The function should return a
- * FAXPP_DecodeFunction for the encoding, or null if the encoding is not supported.
- *
- * \param userData The user data supplied to the FAXPP_set_encoding_callback() method
- * \param encoding The encoding string found, or null if the document does not contain an encoding
- * declaration.
- * \param sniffedEncoding The encoding function already in use by the tokenizer, that was determined
- * using auto-detection when document parsing began.
- *
- * \return The FAXPP_DecodeFunction to use to decode the document, or null if the encoding is not supported
- */
-typedef FAXPP_DecodeFunction (*FAXPP_EncodingCallback)(void *userData, const FAXPP_Text *encoding,
- FAXPP_DecodeFunction sniffedEncoding);
-
-/**
* Creates a parser object
*
* \param mode The type of checks the parser should perform
@@ -170,17 +154,26 @@
void FAXPP_set_encode(FAXPP_Parser *parser, FAXPP_EncodeFunction encode);
/**
- * Sets the encoding callback function that the parser will call when it reads an
- * encoding declaration in the XML document, or determines that the document does
- * not contain an encoding declaration.
+ * Returns the current FAXPP_DecodeFunction that the parser is using.
+ *
+ * \param parser
+ * \return The decode function
*
+ * \relatesalso FAXPP_Parser
+ */
+FAXPP_DecodeFunction FAXPP_get_decode(const FAXPP_Parser *parser);
+
+/**
+ * Sets the FAXPP_DecodeFunction that the parser uses to decode the XML document.
+ * This will typically be called when an encoding declaration is read, to switch to
+ * the correct decode function.
+ *
* \param parser
- * \param callback The encoding callback function to use, or null to use the default encoding callback
- * \param userData The user data to be passed to the callback function when it is called
+ * \param decode The decode function
*
* \relatesalso FAXPP_Parser
*/
-void FAXPP_set_encoding_callback(FAXPP_Parser *parser, FAXPP_EncodingCallback callback, void *userData);
+void FAXPP_set_decode(FAXPP_Parser *parser, FAXPP_DecodeFunction decode);
/**
* Initialize the parser to parse the given buffer. This will halt any
Modified: trunk/faxpp/include/faxpp/token.h
===================================================================
--- trunk/faxpp/include/faxpp/token.h 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/include/faxpp/token.h 2007-08-11 00:29:45 UTC (rev 12)
@@ -25,6 +25,12 @@
XML_DECL_VERSION_TOKEN,
XML_DECL_ENCODING_TOKEN,
+ XML_DECL_ENCODING_UTF8_TOKEN,
+ XML_DECL_ENCODING_UTF16_TOKEN,
+ XML_DECL_ENCODING_UTF16LE_TOKEN,
+ XML_DECL_ENCODING_UTF16BE_TOKEN,
+ XML_DECL_ENCODING_UCS4_TOKEN,
+ XML_DECL_ENCODING_ISO_8859_1_TOKEN,
XML_DECL_STANDALONE_TOKEN,
START_ELEMENT_PREFIX_TOKEN,
Modified: trunk/faxpp/include/faxpp/transcode.h
===================================================================
--- trunk/faxpp/include/faxpp/transcode.h 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/include/faxpp/transcode.h 2007-08-11 00:29:45 UTC (rev 12)
@@ -97,6 +97,24 @@
unsigned int FAXPP_utf8_decode(const void *buffer, const void *buffer_end, Char32 *ch);
/**
+ * Decodes a single ISO-8859-1 (Latin1) character from the buffer into it's unicode codepoint.
+ *
+ * \param buffer The buffer to decode from
+ * \param buffer_end A pointer to the end of the buffer
+ * \param[out] ch The decoded unicode codepoint
+ *
+ * \return The length of the char, unless it is bigger than TRANSCODE_ERROR,
+ * in which case it is an error code.
+ *
+ * \retval TRANSCODE_PREMATURE_END_OF_BUFFER If there is not enough buffer left to
+ * decode another character
+ * \retval TRANSCODE_BAD_ENCODING If the the encoding is invalid
+ *
+ * \see FAXPP_DecodeFunction
+ */
+unsigned int FAXPP_iso_8859_1_decode(const void *buffer, const void *buffer_end, Char32 *ch);
+
+/**
* Decodes a single UTF-16 little endian character from the buffer into it's unicode codepoint.
*
* \param buffer The buffer to decode from
Modified: trunk/faxpp/src/attr_states.h
===================================================================
--- trunk/faxpp/src/attr_states.h 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/src/attr_states.h 2007-08-11 00:29:45 UTC (rev 12)
@@ -264,7 +264,7 @@
switch(env->current_char) {
case '\'':
- env->state = PREFIX(start_element_ws_state);
+ env->state = PREFIX(start_element_mandatory_ws_state);
token_end_position(env);
report_token(ATTRIBUTE_VALUE_TOKEN, env);
next_char(env);
@@ -319,7 +319,7 @@
switch(env->current_char) {
case '"':
- env->state = PREFIX(start_element_ws_state);
+ env->state = PREFIX(start_element_mandatory_ws_state);
token_end_position(env);
report_token(ATTRIBUTE_VALUE_TOKEN, env);
next_char(env);
Modified: trunk/faxpp/src/element_states.h
===================================================================
--- trunk/faxpp/src/element_states.h 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/src/element_states.h 2007-08-11 00:29:45 UTC (rev 12)
@@ -127,6 +127,28 @@
}
FAXPP_Error
+PREFIX(start_element_mandatory_ws_state)(FAXPP_TokenizerEnv *env)
+{
+ END_CHECK;
+
+ READ_CHAR;
+
+ switch(env->current_char) {
+ WHITESPACE:
+ next_char(env);
+ // Fall through
+ case '/':
+ case '>':
+ env->state = PREFIX(start_element_ws_state);
+ break;
+ default:
+ env->state = PREFIX(start_element_ws_state);
+ return EXPECTING_WHITESPACE;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
PREFIX(start_element_ws_state)(FAXPP_TokenizerEnv *env)
{
END_CHECK;
Modified: trunk/faxpp/src/pi.c
===================================================================
--- trunk/faxpp/src/pi.c 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/src/pi.c 2007-08-11 00:29:45 UTC (rev 12)
@@ -32,7 +32,7 @@
default:
env->state = pi_name_state;
next_char(env);
- if((FAXPP_char_flags(env->current_char) & NAME_START_CHAR) == 0) {
+ if((FAXPP_char_flags(env->current_char) & NCNAME_START_CHAR) == 0) {
return INVALID_CHAR_IN_PI_NAME;
}
break;
@@ -113,7 +113,7 @@
token_start_position(env);
return NO_ERROR;
default:
- if((FAXPP_char_flags(env->current_char) & NAME_CHAR) == 0) {
+ if((FAXPP_char_flags(env->current_char) & NCNAME_CHAR) == 0) {
next_char(env);
return INVALID_CHAR_IN_PI_NAME;
}
Modified: trunk/faxpp/src/reference.c
===================================================================
--- trunk/faxpp/src/reference.c 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/src/reference.c 2007-08-11 00:29:45 UTC (rev 12)
@@ -53,7 +53,7 @@
env->state = entity_reference_state;
token_start_position(env);
next_char(env);
- if((FAXPP_char_flags(env->current_char) & NAME_START_CHAR) == 0)
+ if((FAXPP_char_flags(env->current_char) & NCNAME_START_CHAR) == 0)
return INVALID_CHAR_IN_ENTITY_REFERENCE;
break;
}
@@ -323,7 +323,7 @@
}
next_char(env);
- if((FAXPP_char_flags(env->current_char) & NAME_CHAR) == 0)
+ if((FAXPP_char_flags(env->current_char) & NCNAME_CHAR) == 0)
return INVALID_CHAR_IN_ENTITY_REFERENCE;
}
Modified: trunk/faxpp/src/token.c
===================================================================
--- trunk/faxpp/src/token.c 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/src/token.c 2007-08-11 00:29:45 UTC (rev 12)
@@ -62,6 +62,18 @@
return "XML_DECL_VERSION_TOKEN";
case XML_DECL_ENCODING_TOKEN:
return "XML_DECL_ENCODING_TOKEN";
+ case XML_DECL_ENCODING_UTF8_TOKEN:
+ return "XML_DECL_ENCODING_UTF8_TOKEN";
+ case XML_DECL_ENCODING_UTF16_TOKEN:
+ return "XML_DECL_ENCODING_UTF16_TOKEN";
+ case XML_DECL_ENCODING_UTF16LE_TOKEN:
+ return "XML_DECL_ENCODING_UTF16LE_TOKEN";
+ case XML_DECL_ENCODING_UTF16BE_TOKEN:
+ return "XML_DECL_ENCODING_UTF16BE_TOKEN";
+ case XML_DECL_ENCODING_UCS4_TOKEN:
+ return "XML_DECL_ENCODING_UCS4_TOKEN";
+ case XML_DECL_ENCODING_ISO_8859_1_TOKEN:
+ return "XML_DECL_ENCODING_ISO_8859_1_TOKEN";
case XML_DECL_STANDALONE_TOKEN:
return "XML_DECL_STANDALONE_TOKEN";
case XMLNS_PREFIX_TOKEN:
Modified: trunk/faxpp/src/tokenizer_states.c
===================================================================
--- trunk/faxpp/src/tokenizer_states.c 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/src/tokenizer_states.c 2007-08-11 00:29:45 UTC (rev 12)
@@ -61,6 +61,8 @@
return "default_start_element_name_seen_colon_state";
else if(state == default_start_element_name_seen_colon_state2)
return "default_start_element_name_seen_colon_state2";
+ else if(state == default_start_element_mandatory_ws_state)
+ return "default_start_element_mandatory_ws_state";
else if(state == default_start_element_ws_state)
return "default_start_element_ws_state";
else if(state == default_element_content_markup_state)
@@ -110,6 +112,8 @@
return "utf8_start_element_name_seen_colon_state";
else if(state == utf8_start_element_name_seen_colon_state2)
return "utf8_start_element_name_seen_colon_state2";
+ else if(state == utf8_start_element_mandatory_ws_state)
+ return "utf8_start_element_mandatory_ws_state";
else if(state == utf8_start_element_ws_state)
return "utf8_start_element_ws_state";
else if(state == utf8_attr_name_state)
@@ -159,6 +163,8 @@
return "utf16_start_element_name_seen_colon_state";
else if(state == utf16_start_element_name_seen_colon_state2)
return "utf16_start_element_name_seen_colon_state2";
+ else if(state == utf16_start_element_mandatory_ws_state)
+ return "utf16_start_element_mandatory_ws_state";
else if(state == utf16_start_element_ws_state)
return "utf16_start_element_ws_state";
else if(state == utf16_attr_name_state)
@@ -328,16 +334,82 @@
return "xml_decl_encoding_state7";
else if(state == xml_decl_encoding_state8)
return "xml_decl_encoding_state8";
- else if(state == xml_decl_encoding_value_state)
- return "xml_decl_encoding_value_state";
- else if(state == xml_decl_encoding_value_quot_state1)
- return "xml_decl_encoding_value_quot_state1";
- else if(state == xml_decl_encoding_value_quot_state2)
- return "xml_decl_encoding_value_quot_state2";
- else if(state == xml_decl_encoding_value_apos_state1)
- return "xml_decl_encoding_value_apos_state1";
- else if(state == xml_decl_encoding_value_apos_state2)
- return "xml_decl_encoding_value_apos_state2";
+
+ else if(state == xml_decl_encoding_value_state1)
+ return "xml_decl_encoding_value_state1";
+ else if(state == xml_decl_encoding_value_state2)
+ return "xml_decl_encoding_value_state2";
+ else if(state == xml_decl_encoding_value_utf_state1)
+ return "xml_decl_encoding_value_utf_state1";
+ else if(state == xml_decl_encoding_value_utf_state2)
+ return "xml_decl_encoding_value_utf_state2";
+ else if(state == xml_decl_encoding_value_utf_state3)
+ return "xml_decl_encoding_value_utf_state3";
+ else if(state == xml_decl_encoding_value_utf_state4)
+ return "xml_decl_encoding_value_utf_state4";
+ else if(state == xml_decl_encoding_value_utf8_state)
+ return "xml_decl_encoding_value_utf8_state";
+ else if(state == xml_decl_encoding_value_utf16_state1)
+ return "xml_decl_encoding_value_utf16_state1";
+ else if(state == xml_decl_encoding_value_utf16_state2)
+ return "xml_decl_encoding_value_utf16_state2";
+ else if(state == xml_decl_encoding_value_utf16be_state1)
+ return "xml_decl_encoding_value_utf16be_state1";
+ else if(state == xml_decl_encoding_value_utf16be_state2)
+ return "xml_decl_encoding_value_utf16be_state2";
+ else if(state == xml_decl_encoding_value_utf16le_state1)
+ return "xml_decl_encoding_value_utf16le_state1";
+ else if(state == xml_decl_encoding_value_utf16le_state2)
+ return "xml_decl_encoding_value_utf16le_state2";
+ else if(state == xml_decl_encoding_value_iso_state1)
+ return "xml_decl_encoding_value_iso_state1";
+ else if(state == xml_decl_encoding_value_iso_state2)
+ return "xml_decl_encoding_value_iso_state2";
+ else if(state == xml_decl_encoding_value_iso_state3)
+ return "xml_decl_encoding_value_iso_state3";
+ else if(state == xml_decl_encoding_value_iso_state4)
+ return "xml_decl_encoding_value_iso_state4";
+ else if(state == xml_decl_encoding_value_ucs4_state1)
+ return "xml_decl_encoding_value_ucs4_state1";
+ else if(state == xml_decl_encoding_value_ucs4_state2)
+ return "xml_decl_encoding_value_ucs4_state2";
+ else if(state == xml_decl_encoding_value_ucs4_state3)
+ return "xml_decl_encoding_value_ucs4_state3";
+ else if(state == xml_decl_encoding_value_ucs4_state4)
+ return "xml_decl_encoding_value_ucs4_state4";
+ else if(state == xml_decl_encoding_value_ucs4_state5)
+ return "xml_decl_encoding_value_ucs4_state5";
+ else if(state == xml_decl_encoding_value_ucs4_state6)
+ return "xml_decl_encoding_value_ucs4_state6";
+ else if(state == xml_decl_encoding_value_ucs4_state7)
+ return "xml_decl_encoding_value_ucs4_state7";
+ else if(state == xml_decl_encoding_value_ucs4_state8)
+ return "xml_decl_encoding_value_ucs4_state8";
+ else if(state == xml_decl_encoding_value_ucs4_state9)
+ return "xml_decl_encoding_value_ucs4_state9";
+ else if(state == xml_decl_encoding_value_ucs4_state10)
+ return "xml_decl_encoding_value_ucs4_state10";
+ else if(state == xml_decl_encoding_value_ucs4_state11)
+ return "xml_decl_encoding_value_ucs4_state11";
+ else if(state == xml_decl_encoding_value_ucs4_state12)
+ return "xml_decl_encoding_value_ucs4_state12";
+ else if(state == xml_decl_encoding_value_iso_8859_1_state1)
+ return "xml_decl_encoding_value_iso_8859_1_state1";
+ else if(state == xml_decl_encoding_value_iso_8859_1_state2)
+ return "xml_decl_encoding_value_iso_8859_1_state2";
+ else if(state == xml_decl_encoding_value_iso_8859_1_state3)
+ return "xml_decl_encoding_value_iso_8859_1_state3";
+ else if(state == xml_decl_encoding_value_iso_8859_1_state4)
+ return "xml_decl_encoding_value_iso_8859_1_state4";
+ else if(state == xml_decl_encoding_value_iso_8859_1_state5)
+ return "xml_decl_encoding_value_iso_8859_1_state5";
+ else if(state == xml_decl_encoding_value_iso_8859_1_state6)
+ return "xml_decl_encoding_value_iso_8859_1_state6";
+ else if(state == xml_decl_encoding_value_quot_state)
+ return "xml_decl_encoding_value_quot_state";
+ else if(state == xml_decl_encoding_value_apos_state)
+ return "xml_decl_encoding_value_apos_state";
+
else if(state == xml_decl_standalone_ws_state)
return "xml_decl_standalone_ws_state";
else if(state == xml_decl_seen_question_state)
Modified: trunk/faxpp/src/tokenizer_states.h
===================================================================
--- trunk/faxpp/src/tokenizer_states.h 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/src/tokenizer_states.h 2007-08-11 00:29:45 UTC (rev 12)
@@ -32,6 +32,7 @@
FAXPP_Error default_start_element_name_state(FAXPP_TokenizerEnv *env);
FAXPP_Error default_start_element_name_seen_colon_state(FAXPP_TokenizerEnv *env);
FAXPP_Error default_start_element_name_seen_colon_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error default_start_element_mandatory_ws_state(FAXPP_TokenizerEnv *env);
FAXPP_Error default_start_element_ws_state(FAXPP_TokenizerEnv *env);
FAXPP_Error default_ns_name_state1(FAXPP_TokenizerEnv *env);
@@ -61,6 +62,7 @@
FAXPP_Error utf8_start_element_name_state(FAXPP_TokenizerEnv *env);
FAXPP_Error utf8_start_element_name_seen_colon_state(FAXPP_TokenizerEnv *env);
FAXPP_Error utf8_start_element_name_seen_colon_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error utf8_start_element_mandatory_ws_state(FAXPP_TokenizerEnv *env);
FAXPP_Error utf8_start_element_ws_state(FAXPP_TokenizerEnv *env);
FAXPP_Error utf8_ns_name_state1(FAXPP_TokenizerEnv *env);
@@ -90,6 +92,7 @@
FAXPP_Error utf16_start_element_name_state(FAXPP_TokenizerEnv *env);
FAXPP_Error utf16_start_element_name_seen_colon_state(FAXPP_TokenizerEnv *env);
FAXPP_Error utf16_start_element_name_seen_colon_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error utf16_start_element_mandatory_ws_state(FAXPP_TokenizerEnv *env);
FAXPP_Error utf16_start_element_ws_state(FAXPP_TokenizerEnv *env);
FAXPP_Error utf16_ns_name_state1(FAXPP_TokenizerEnv *env);
@@ -209,12 +212,45 @@
FAXPP_Error xml_decl_encoding_state6(FAXPP_TokenizerEnv *env);
FAXPP_Error xml_decl_encoding_state7(FAXPP_TokenizerEnv *env);
FAXPP_Error xml_decl_encoding_state8(FAXPP_TokenizerEnv *env);
-FAXPP_Error xml_decl_encoding_value_state(FAXPP_TokenizerEnv *env);
-FAXPP_Error xml_decl_encoding_value_quot_state1(FAXPP_TokenizerEnv *env);
-FAXPP_Error xml_decl_encoding_value_quot_state2(FAXPP_TokenizerEnv *env);
-FAXPP_Error xml_decl_encoding_value_apos_state1(FAXPP_TokenizerEnv *env);
-FAXPP_Error xml_decl_encoding_value_apos_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_utf_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_utf_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_utf_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_utf_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_utf8_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_utf16_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_utf16_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_utf16be_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_utf16be_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_utf16le_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_utf16le_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_iso_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_iso_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_iso_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_iso_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_ucs4_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_ucs4_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_ucs4_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_ucs4_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_ucs4_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_ucs4_state6(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_ucs4_state7(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_ucs4_state8(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_ucs4_state9(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_ucs4_state10(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_ucs4_state11(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_ucs4_state12(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_iso_8859_1_state1(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_iso_8859_1_state2(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_iso_8859_1_state3(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_iso_8859_1_state4(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_iso_8859_1_state5(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_iso_8859_1_state6(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_quot_state(FAXPP_TokenizerEnv *env);
+FAXPP_Error xml_decl_encoding_value_apos_state(FAXPP_TokenizerEnv *env);
+
FAXPP_Error xml_decl_standalone_ws_state(FAXPP_TokenizerEnv *env);
FAXPP_Error xml_decl_standalone_state1(FAXPP_TokenizerEnv *env);
FAXPP_Error xml_decl_standalone_state2(FAXPP_TokenizerEnv *env);
Modified: trunk/faxpp/src/transcode.c
===================================================================
--- trunk/faxpp/src/transcode.c 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/src/transcode.c 2007-08-11 00:29:45 UTC (rev 12)
@@ -20,6 +20,8 @@
{
if(t == FAXPP_utf8_decode)
return "UTF-8";
+ else if(t == FAXPP_iso_8859_1_decode)
+ return "ISO-8859-1";
else if (t == FAXPP_utf16_le_decode)
return "UTF-16LE";
else if (t == FAXPP_utf16_be_decode)
@@ -149,6 +151,15 @@
return len;
}
+// ISO-8859-1 is the first 256 codepoints from Unicode
+unsigned int
+FAXPP_iso_8859_1_decode(const void *buffer, const void *buffer_end,
+ Char32 *ch)
+{
+ *ch = *(uint8_t*)buffer;
+ return 1;
+}
+
/*
* Code range | UTF-16
* --------------+-------------------------------------
Modified: trunk/faxpp/src/xml_parser.c
===================================================================
--- trunk/faxpp/src/xml_parser.c 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/src/xml_parser.c 2007-08-11 00:29:45 UTC (rev 12)
@@ -20,6 +20,7 @@
#include "xml_parser.h"
#include "char_classes.h"
#include "tokenizer_states.h"
+#include "config.h"
#define INITIAL_ATTRS_SIZE 4
#define INITIAL_NS_SIZE 6
@@ -36,29 +37,28 @@
static FAXPP_Error nc_start_document_next_event(FAXPP_ParserEnv *env);
static FAXPP_Error nc_next_event(FAXPP_ParserEnv *env);
+static FAXPP_Error nc_pi_content_next_event(FAXPP_ParserEnv *env);
+static FAXPP_Error nc_unsupported_encoding_next_event(FAXPP_ParserEnv *env);
-static FAXPP_Error wf_start_document_next_event(FAXPP_ParserEnv *env);
static FAXPP_Error wf_next_event(FAXPP_ParserEnv *env);
-static FAXPP_DecodeFunction p_default_encoding_callback(void *userData, const FAXPP_Text *encoding,
- FAXPP_DecodeFunction sniffedEncoding)
-{
- // TBD implement this - jpcs
- // TBD eliminate the callback functions entirely - jpcs
- return sniffedEncoding;
-}
-
FAXPP_Parser *FAXPP_create_parser(FAXPP_ParseMode mode, FAXPP_EncodeFunction encode)
{
FAXPP_ParserEnv *env = malloc(sizeof(FAXPP_ParserEnv));
memset(env, 0, sizeof(FAXPP_ParserEnv));
- env->mode = mode;
env->encode = encode;
- env->encoding = p_default_encoding_callback;
+ switch(mode) {
+ case NO_CHECKS_PARSE_MODE:
+ env->main_next_event = nc_next_event;
+ break;
+ case WELL_FORMED_PARSE_MODE:
+ env->main_next_event = wf_next_event;
+ break;
+ }
- /* The next_event field is set in p_reset_parser() */
+ env->next_event = nc_start_document_next_event;
env->max_attr_count = INITIAL_ATTRS_SIZE;
env->attrs = (FAXPP_Attribute*)malloc(sizeof(FAXPP_Attribute) * INITIAL_ATTRS_SIZE);
@@ -143,16 +143,18 @@
}
-void FAXPP_set_encoding_callback(FAXPP_Parser *parser, FAXPP_EncodingCallback callback, void *userData)
+FAXPP_DecodeFunction FAXPP_get_decode(const FAXPP_Parser *parser)
{
- if(callback == 0) {
- parser->encoding = p_default_encoding_callback;
- parser->encoding_user_data = 0;
+ return parser->tenv.decode;
+}
+
+void FAXPP_set_decode(FAXPP_Parser *parser, FAXPP_DecodeFunction decode)
+{
+ FAXPP_set_tokenizer_decode(&parser->tenv, decode);
+ parser->decode_needs_setting = 0;
+ if(parser->next_event == nc_unsupported_encoding_next_event) {
+ parser->next_event = parser->main_next_event;
}
- else {
- parser->encoding = callback;
- parser->encoding_user_data = userData;
- }
}
static FAXPP_Error p_reset_parser(FAXPP_ParserEnv *env, int allocate_buffer)
@@ -161,6 +163,7 @@
FAXPP_reset_buffer(&env->stack_buffer);
env->buffered_token = 0;
+ env->decode_needs_setting = 0;
if(allocate_buffer && !env->read_buffer) {
env->read_buffer = malloc(READ_BUFFER_SIZE);
@@ -168,14 +171,7 @@
env->read_buffer_length = READ_BUFFER_SIZE;
}
- switch(env->mode) {
- case NO_CHECKS_PARSE_MODE:
- env->next_event = nc_start_document_next_event;
- break;
- case WELL_FORMED_PARSE_MODE:
- env->next_event = wf_start_document_next_event;
- break;
- }
+ env->next_event = nc_start_document_next_event;
return NO_ERROR;
}
@@ -651,7 +647,6 @@
static FAXPP_Error nc_start_document_next_event(FAXPP_ParserEnv *env)
{
FAXPP_Error err = 0;
- FAXPP_DecodeFunction decode;
p_reset_event(env);
@@ -665,20 +660,61 @@
break;
case XML_DECL_ENCODING_TOKEN:
p_copy_text_from_token(&env->event.encoding, env, /*useTokenBuffer*/0);
+ env->decode_needs_setting = 1;
break;
+ case XML_DECL_ENCODING_UTF8_TOKEN:
+ p_copy_text_from_token(&env->event.encoding, env, /*useTokenBuffer*/0);
+ if(env->tenv.decode != FAXPP_utf8_decode)
+ return BAD_ENCODING;
+ break;
+ case XML_DECL_ENCODING_UTF16_TOKEN:
+ p_copy_text_from_token(&env->event.encoding, env, /*useTokenBuffer*/0);
+ if(env->tenv.decode != FAXPP_utf16_le_decode &&
+ env->tenv.decode != FAXPP_utf16_be_decode &&
+ env->tenv.decode != FAXPP_utf16_native_decode)
+ return BAD_ENCODING;
+ break;
+ case XML_DECL_ENCODING_UTF16LE_TOKEN:
+ p_copy_text_from_token(&env->event.encoding, env, /*useTokenBuffer*/0);
+ if(env->tenv.decode != FAXPP_utf16_le_decode
+#ifndef WORDS_BIGENDIAN
+ && env->tenv.decode != FAXPP_utf16_native_decode
+#endif
+ )
+ return BAD_ENCODING;
+ break;
+ case XML_DECL_ENCODING_UTF16BE_TOKEN:
+ p_copy_text_from_token(&env->event.encoding, env, /*useTokenBuffer*/0);
+ if(env->tenv.decode != FAXPP_utf16_be_decode
+#ifdef WORDS_BIGENDIAN
+ && env->tenv.decode != FAXPP_utf16_native_decode
+#endif
+ )
+ return BAD_ENCODING;
+ break;
+ case XML_DECL_ENCODING_UCS4_TOKEN:
+ p_copy_text_from_token(&env->event.encoding, env, /*useTokenBuffer*/0);
+ if(env->tenv.decode != FAXPP_ucs4_le_decode &&
+ env->tenv.decode != FAXPP_ucs4_be_decode &&
+ env->tenv.decode != FAXPP_ucs4_native_decode)
+ return BAD_ENCODING;
+ break;
+ case XML_DECL_ENCODING_ISO_8859_1_TOKEN:
+ p_copy_text_from_token(&env->event.encoding, env, /*useTokenBuffer*/0);
+ FAXPP_set_decode(env, FAXPP_iso_8859_1_decode);
+ break;
case XML_DECL_STANDALONE_TOKEN:
p_copy_text_from_token(&env->event.standalone, env, /*useTokenBuffer*/0);
break;
default:
env->buffered_token = 1;
+ env->next_event = env->main_next_event;
+ env->event.type = START_DOCUMENT_EVENT;
- // Invoke the callback function to change the decoder
- decode = env->encoding(env->encoding_user_data, &env->event.encoding, env->tenv.decode);
- if(decode == 0) return UNSUPPORTED_ENCODING;
- FAXPP_set_tokenizer_decode(&env->tenv, decode);
+ if(env->decode_needs_setting) {
+ env->next_event = nc_unsupported_encoding_next_event;
+ }
- env->next_event = nc_next_event;
- env->event.type = START_DOCUMENT_EVENT;
return NO_ERROR;
}
}
@@ -687,10 +723,33 @@
return NO_ERROR;
}
+static FAXPP_Error nc_pi_content_next_event(FAXPP_ParserEnv *env)
+{
+ FAXPP_Error err = 0;
+
+ p_next_token(err, env);
+
+ switch(env->tenv.result_token.type) {
+ case PI_VALUE_TOKEN:
+ p_copy_text_from_token(&env->event.value, env, /*useTokenBuffer*/0);
+ break;
+ default:
+ env->buffered_token = 1;
+ break;
+ }
+
+ env->next_event = env->main_next_event;
+ env->event.type = PI_EVENT;
+ return NO_ERROR;
+}
+
+static FAXPP_Error nc_unsupported_encoding_next_event(FAXPP_ParserEnv *env)
+{
+ return UNSUPPORTED_ENCODING;
+}
+
static FAXPP_Error nc_next_event(FAXPP_ParserEnv *env)
{
- // TBD keep all state in the FAXPP_ParserEnv to allow progressive parse to work correctly - jpcs
-
FAXPP_Error err = 0;
p_reset_event(env);
@@ -809,17 +868,8 @@
p_copy_text_from_token(&env->event.name, env, /*useTokenBuffer*/0);
p_set_location_from_token(env);
- p_next_token(err, env);
-
- if(env->tenv.result_token.type == PI_VALUE_TOKEN) {
- p_copy_text_from_token(&env->event.value, env, /*useTokenBuffer*/0);
- } else {
- env->buffered_token = 1;
- }
-
- env->event.type = PI_EVENT;
- return NO_ERROR;
-
+ env->next_event = nc_pi_content_next_event;
+ return nc_pi_content_next_event(env);
case AMP_ENTITY_REFERENCE_TOKEN:
if(env->current_attr) {
err = p_set_attr_value_name(env->current_attr, env, ENTITY_REFERENCE_EVENT, '&');
@@ -945,6 +995,12 @@
case NO_TOKEN:
case XML_DECL_VERSION_TOKEN:
case XML_DECL_ENCODING_TOKEN:
+ case XML_DECL_ENCODING_UTF8_TOKEN:
+ case XML_DECL_ENCODING_UTF16_TOKEN:
+ case XML_DECL_ENCODING_UTF16LE_TOKEN:
+ case XML_DECL_ENCODING_UTF16BE_TOKEN:
+ case XML_DECL_ENCODING_UCS4_TOKEN:
+ case XML_DECL_ENCODING_ISO_8859_1_TOKEN:
case XML_DECL_STANDALONE_TOKEN:
case PI_VALUE_TOKEN:
break;
@@ -998,6 +1054,8 @@
nsinfo = env->namespace_stack;
while(nsinfo != 0) {
if(p_compare_text(prefix, &nsinfo->prefix) == 0) {
+ if(nsinfo->prefix.len != 0 && nsinfo->uri.len == 0)
+ return NO_URI_FOR_PREFIX;
p_set_text_from_text(uri, &nsinfo->uri);
return NO_ERROR;
}
@@ -1109,21 +1167,6 @@
return result;
}
-static FAXPP_Error wf_start_document_next_event(FAXPP_ParserEnv *env)
-{
- FAXPP_Error err = nc_start_document_next_event(env);
- if(err != 0) return err;
-
- switch(env->event.type) {
- case START_DOCUMENT_EVENT:
- env->next_event = wf_next_event;
- break;
- default: break;
- }
-
- return err;
-}
-
static FAXPP_Error wf_next_event(FAXPP_ParserEnv *env)
{
int i, j;
@@ -1136,7 +1179,11 @@
switch(env->event.type) {
case START_DOCUMENT_EVENT:
+ // Handled in nc_start_document_next_event
break;
+ case PI_EVENT:
+ // Handled in nc_pi_content_next_event
+ break;
case END_DOCUMENT_EVENT:
break;
case START_ELEMENT_EVENT:
@@ -1279,8 +1326,6 @@
break;
case COMMENT_EVENT:
break;
- case PI_EVENT:
- break;
case ENTITY_REFERENCE_EVENT:
/* [WFC: Entity Declared] */
/* [WFC: Parsed Entity] */
Modified: trunk/faxpp/src/xml_parser.h
===================================================================
--- trunk/faxpp/src/xml_parser.h 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/src/xml_parser.h 2007-08-11 00:29:45 UTC (rev 12)
@@ -49,15 +49,14 @@
typedef struct FAXPP_ParserEnv_s FAXPP_ParserEnv;
+typedef FAXPP_Error (*FAXPP_NextEvent)(FAXPP_ParserEnv *env);
+
struct FAXPP_ParserEnv_s {
- FAXPP_Error (*next_event)(FAXPP_ParserEnv *env);
+ FAXPP_NextEvent next_event;
+ FAXPP_NextEvent main_next_event;
- FAXPP_ParseMode mode;
FAXPP_EncodeFunction encode;
- FAXPP_EncodingCallback encoding;
- void *encoding_user_data;
-
FAXPP_ReadCallback read;
void *read_user_data;
@@ -66,6 +65,7 @@
FAXPP_TokenizerEnv tenv;
unsigned int buffered_token:1;
+ unsigned int decode_needs_setting:1;
unsigned int null_terminate:1;
unsigned int err_line;
Modified: trunk/faxpp/src/xml_tokenizer.c
===================================================================
--- trunk/faxpp/src/xml_tokenizer.c 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/src/xml_tokenizer.c 2007-08-11 00:29:45 UTC (rev 12)
@@ -297,6 +297,18 @@
tokenizer->start_element_name_state = utf8_start_element_name_state;
tokenizer->element_content_state = utf8_element_content_state;
}
+ else if(decode == FAXPP_ucs4_native_decode ||
+#ifdef WORDS_BIGENDIAN
+ decode == FAXPP_ucs4_be_decode
+#else
+ decode == FAXPP_ucs4_le_decode
+#endif
+ ) {
+ tokenizer->decode = FAXPP_ucs4_native_decode;
+
+ tokenizer->start_element_name_state = default_start_element_name_state;
+ tokenizer->element_content_state = default_element_content_state;
+ }
else {
tokenizer->decode = decode;
Modified: trunk/faxpp/src/xmldecl.c
===================================================================
--- trunk/faxpp/src/xmldecl.c 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/src/xmldecl.c 2007-08-11 00:29:45 UTC (rev 12)
@@ -144,7 +144,7 @@
default:
env->state = pi_name_state;
next_char(env);
- if((FAXPP_char_flags(env->current_char) & NAME_CHAR) == 0)
+ if((FAXPP_char_flags(env->current_char) & NCNAME_CHAR) == 0)
return INVALID_CHAR_IN_PI_NAME;
break;
}
@@ -324,21 +324,23 @@
SINGLE_CHAR_STATE(xml_decl_encoding_state5, 'd', 0, xml_decl_encoding_state6, INVALID_CHAR_IN_XML_DECL)
SINGLE_CHAR_STATE(xml_decl_encoding_state6, 'i', 0, xml_decl_encoding_state7, INVALID_CHAR_IN_XML_DECL)
SINGLE_CHAR_STATE(xml_decl_encoding_state7, 'n', 0, xml_decl_encoding_state8, INVALID_CHAR_IN_XML_DECL)
-SINGLE_CHAR_STATE(xml_decl_encoding_state8, 'g', xml_decl_encoding_value_state, equals_state, INVALID_CHAR_IN_XML_DECL)
+SINGLE_CHAR_STATE(xml_decl_encoding_state8, 'g', xml_decl_encoding_value_state1, equals_state, INVALID_CHAR_IN_XML_DECL)
FAXPP_Error
-xml_decl_encoding_value_state(FAXPP_TokenizerEnv *env)
+xml_decl_encoding_value_state1(FAXPP_TokenizerEnv *env)
{
read_char(env);
switch(env->current_char) {
case '"':
- env->state = xml_decl_encoding_value_quot_state1;
+ env->stored_state = xml_decl_encoding_value_quot_state;
+ env->state = xml_decl_encoding_value_state2;
next_char(env);
token_start_position(env);
break;
case '\'':
- env->state = xml_decl_encoding_value_apos_state1;
+ env->stored_state = xml_decl_encoding_value_apos_state;
+ env->state = xml_decl_encoding_value_state2;
next_char(env);
token_start_position(env);
break;
@@ -350,24 +352,35 @@
return NO_ERROR;
}
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
FAXPP_Error
-xml_decl_encoding_value_quot_state1(FAXPP_TokenizerEnv *env)
+xml_decl_encoding_value_state2(FAXPP_TokenizerEnv *env)
{
read_char(env);
switch(env->current_char) {
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'J': case 'K': case 'L': case 'M':
+ case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'V': case 'W': case 'X': case 'Y': case 'Z':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'j': case 'k': case 'l': case 'm':
+ case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'v': case 'w': case 'x': case 'y': case 'z':
+ retrieve_state(env);
+ break;
+ case 'U':
+ case 'u':
+ env->state = xml_decl_encoding_value_utf_state1;
+ break;
+ case 'I':
+ case 'i':
+ env->state = xml_decl_encoding_value_iso_state1;
+ break;
case '"':
- env->state = xml_decl_standalone_ws_state;
- next_char(env);
+ case '\'':
+ retrieve_state(env);
return INVALID_ENCODING_VALUE;
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M':
- case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm':
- case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
- env->state = xml_decl_encoding_value_quot_state2;
- break;
LINE_ENDINGS
default:
+ retrieve_state(env);
next_char(env);
return INVALID_ENCODING_VALUE;
}
@@ -375,47 +388,356 @@
return NO_ERROR;
}
+#define ENCODING_VALUE_STATE1(name, char1, next_state) \
+FAXPP_Error \
+name(FAXPP_TokenizerEnv *env) \
+{ \
+ read_char(env); \
+\
+ switch(env->current_char) { \
+ case (char1): \
+ env->state = (next_state); \
+ next_char(env); \
+ break; \
+ default: \
+ retrieve_state(env); \
+ break; \
+ } \
+ return NO_ERROR; \
+}
+
+#define ENCODING_VALUE_STATE2(name, char1, char2, next_state) \
+FAXPP_Error \
+name(FAXPP_TokenizerEnv *env) \
+{ \
+ read_char(env); \
+\
+ switch(env->current_char) { \
+ case (char1): \
+ case (char2): \
+ env->state = (next_state); \
+ next_char(env); \
+ break; \
+ default: \
+ retrieve_state(env); \
+ break; \
+ } \
+ return NO_ERROR; \
+}
+
+ENCODING_VALUE_STATE2(xml_decl_encoding_value_utf_state1, 'T', 't', xml_decl_encoding_value_utf_state2)
+ENCODING_VALUE_STATE2(xml_decl_encoding_value_utf_state2, 'F', 'f', xml_decl_encoding_value_utf_state3)
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_utf_state3, '-', xml_decl_encoding_value_utf_state4)
+
FAXPP_Error
-xml_decl_encoding_value_quot_state2(FAXPP_TokenizerEnv *env)
+xml_decl_encoding_value_utf_state4(FAXPP_TokenizerEnv *env)
{
read_char(env);
switch(env->current_char) {
+ case '1':
+ env->state = xml_decl_encoding_value_utf16_state1;
+ next_char(env);
+ break;
+ case '8':
+ env->state = xml_decl_encoding_value_utf8_state;
+ next_char(env);
+ break;
+ default:
+ retrieve_state(env);
+ break;
+ }
+ return NO_ERROR;
+}
+
+FAXPP_Error
+xml_decl_encoding_value_utf8_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '\'':
+ if(env->stored_state == xml_decl_encoding_value_apos_state) {
+ env->state = xml_decl_standalone_ws_state;
+ token_end_position(env);
+ report_token(XML_DECL_ENCODING_UTF8_TOKEN, env);
+ next_char(env);
+ }
+ else {
+ retrieve_state(env);
+ }
+ break;
case '"':
- env->state = xml_decl_standalone_ws_state;
- token_end_position(env);
- report_token(XML_DECL_ENCODING_TOKEN, env);
+ if(env->stored_state == xml_decl_encoding_value_quot_state) {
+ env->state = xml_decl_standalone_ws_state;
+ token_end_position(env);
+ report_token(XML_DECL_ENCODING_UTF8_TOKEN, env);
+ next_char(env);
+ }
+ else {
+ retrieve_state(env);
+ }
break;
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M':
- case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm':
- case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
- case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': case '_': case '-':
+ default:
+ retrieve_state(env);
break;
- LINE_ENDINGS
+ }
+ return NO_ERROR;
+}
+
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_utf16_state1, '6', xml_decl_encoding_value_utf16_state2)
+
+FAXPP_Error
+xml_decl_encoding_value_utf16_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '\'':
+ if(env->stored_state == xml_decl_encoding_value_apos_state) {
+ env->state = xml_decl_standalone_ws_state;
+ token_end_position(env);
+ report_token(XML_DECL_ENCODING_UTF16_TOKEN, env);
+ next_char(env);
+ }
+ else {
+ retrieve_state(env);
+ }
+ break;
+ case '"':
+ if(env->stored_state == xml_decl_encoding_value_quot_state) {
+ env->state = xml_decl_standalone_ws_state;
+ token_end_position(env);
+ report_token(XML_DECL_ENCODING_UTF16_TOKEN, env);
+ next_char(env);
+ }
+ else {
+ retrieve_state(env);
+ }
+ break;
+ case 'B':
+ case 'b':
+ env->state = xml_decl_encoding_value_utf16be_state1;
+ next_char(env);
+ break;
+ case 'L':
+ case 'l':
+ env->state = xml_decl_encoding_value_utf16le_state1;
+ next_char(env);
+ break;
default:
+ retrieve_state(env);
+ break;
+ }
+ return NO_ERROR;
+}
+
+ENCODING_VALUE_STATE2(xml_decl_encoding_value_utf16be_state1, 'E', 'e', xml_decl_encoding_value_utf16be_state2)
+
+FAXPP_Error
+xml_decl_encoding_value_utf16be_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '\'':
+ if(env->stored_state == xml_decl_encoding_value_apos_state) {
+ env->state = xml_decl_standalone_ws_state;
+ token_end_position(env);
+ report_token(XML_DECL_ENCODING_UTF16BE_TOKEN, env);
+ next_char(env);
+ }
+ else {
+ retrieve_state(env);
+ }
+ break;
+ case '"':
+ if(env->stored_state == xml_decl_encoding_value_quot_state) {
+ env->state = xml_decl_standalone_ws_state;
+ token_end_position(env);
+ report_token(XML_DECL_ENCODING_UTF16BE_TOKEN, env);
+ next_char(env);
+ }
+ else {
+ retrieve_state(env);
+ }
+ break;
+ default:
+ retrieve_state(env);
+ break;
+ }
+ return NO_ERROR;
+}
+
+ENCODING_VALUE_STATE2(xml_decl_encoding_value_utf16le_state1, 'E', 'e', xml_decl_encoding_value_utf16le_state2)
+
+FAXPP_Error
+xml_decl_encoding_value_utf16le_state2(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '\'':
+ if(env->stored_state == xml_decl_encoding_value_apos_state) {
+ env->state = xml_decl_standalone_ws_state;
+ token_end_position(env);
+ report_token(XML_DECL_ENCODING_UTF16LE_TOKEN, env);
+ next_char(env);
+ }
+ else {
+ retrieve_state(env);
+ }
+ break;
+ case '"':
+ if(env->stored_state == xml_decl_encoding_value_quot_state) {
+ env->state = xml_decl_standalone_ws_state;
+ token_end_position(env);
+ report_token(XML_DECL_ENCODING_UTF16LE_TOKEN, env);
+ next_char(env);
+ }
+ else {
+ retrieve_state(env);
+ }
+ break;
+ default:
+ retrieve_state(env);
+ break;
+ }
+ return NO_ERROR;
+}
+
+// ISO-10646-UCS-4
+ENCODING_VALUE_STATE2(xml_decl_encoding_value_iso_state1, 'S', 's', xml_decl_encoding_value_iso_state2)
+ENCODING_VALUE_STATE2(xml_decl_encoding_value_iso_state2, 'O', 'o', xml_decl_encoding_value_iso_state3)
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_iso_state3, '-', xml_decl_encoding_value_iso_state4)
+
+FAXPP_Error
+xml_decl_encoding_value_iso_state4(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '1':
+ env->state = xml_decl_encoding_value_ucs4_state1;
next_char(env);
- return INVALID_ENCODING_VALUE;
+ break;
+ case '8':
+ env->state = xml_decl_encoding_value_iso_8859_1_state1;
+ next_char(env);
+ break;
+ default:
+ retrieve_state(env);
+ break;
}
- next_char(env);
return NO_ERROR;
}
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_ucs4_state1, '1', xml_decl_encoding_value_ucs4_state2)
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_ucs4_state2, '0', xml_decl_encoding_value_ucs4_state3)
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_ucs4_state3, '6', xml_decl_encoding_value_ucs4_state4)
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_ucs4_state4, '4', xml_decl_encoding_value_ucs4_state5)
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_ucs4_state5, '6', xml_decl_encoding_value_ucs4_state6)
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_ucs4_state6, '-', xml_decl_encoding_value_ucs4_state7)
+ENCODING_VALUE_STATE2(xml_decl_encoding_value_ucs4_state7, 'U', 'u', xml_decl_encoding_value_ucs4_state8)
+ENCODING_VALUE_STATE2(xml_decl_encoding_value_ucs4_state8, 'C', 'c', xml_decl_encoding_value_ucs4_state9)
+ENCODING_VALUE_STATE2(xml_decl_encoding_value_ucs4_state9, 'S', 's', xml_decl_encoding_value_ucs4_state10)
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_ucs4_state10, '-', xml_decl_encoding_value_ucs4_state11)
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_ucs4_state11, '4', xml_decl_encoding_value_ucs4_state12)
+
FAXPP_Error
-xml_decl_encoding_value_apos_state1(FAXPP_TokenizerEnv *env)
+xml_decl_encoding_value_ucs4_state12(FAXPP_TokenizerEnv *env)
{
read_char(env);
switch(env->current_char) {
case '\'':
+ if(env->stored_state == xml_decl_encoding_value_apos_state) {
+ env->state = xml_decl_standalone_ws_state;
+ token_end_position(env);
+ report_token(XML_DECL_ENCODING_UCS4_TOKEN, env);
+ next_char(env);
+ }
+ else {
+ retrieve_state(env);
+ }
+ break;
+ case '"':
+ if(env->stored_state == xml_decl_encoding_value_quot_state) {
+ env->state = xml_decl_standalone_ws_state;
+ token_end_position(env);
+ report_token(XML_DECL_ENCODING_UCS4_TOKEN, env);
+ next_char(env);
+ }
+ else {
+ retrieve_state(env);
+ }
+ break;
+ default:
+ retrieve_state(env);
+ break;
+ }
+ return NO_ERROR;
+}
+
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_iso_8859_1_state1, '8', xml_decl_encoding_value_iso_8859_1_state2)
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_iso_8859_1_state2, '5', xml_decl_encoding_value_iso_8859_1_state3)
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_iso_8859_1_state3, '9', xml_decl_encoding_value_iso_8859_1_state4)
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_iso_8859_1_state4, '-', xml_decl_encoding_value_iso_8859_1_state5)
+ENCODING_VALUE_STATE1(xml_decl_encoding_value_iso_8859_1_state5, '1', xml_decl_encoding_value_iso_8859_1_state6)
+
+FAXPP_Error
+xml_decl_encoding_value_iso_8859_1_state6(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '\'':
+ if(env->stored_state == xml_decl_encoding_value_apos_state) {
+ env->state = xml_decl_standalone_ws_state;
+ token_end_position(env);
+ report_token(XML_DECL_ENCODING_ISO_8859_1_TOKEN, env);
+ next_char(env);
+ }
+ else {
+ retrieve_state(env);
+ }
+ break;
+ case '"':
+ if(env->stored_state == xml_decl_encoding_value_quot_state) {
+ env->state = xml_decl_standalone_ws_state;
+ token_end_position(env);
+ report_token(XML_DECL_ENCODING_ISO_8859_1_TOKEN, env);
+ next_char(env);
+ }
+ else {
+ retrieve_state(env);
+ }
+ break;
+ default:
+ retrieve_state(env);
+ break;
+ }
+ return NO_ERROR;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+FAXPP_Error
+xml_decl_encoding_value_quot_state(FAXPP_TokenizerEnv *env)
+{
+ read_char(env);
+
+ switch(env->current_char) {
+ case '"':
env->state = xml_decl_standalone_ws_state;
- next_char(env);
- return INVALID_ENCODING_VALUE;
+ token_end_position(env);
+ report_token(XML_DECL_ENCODING_TOKEN, env);
+ break;
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M':
case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm':
case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
- env->state = xml_decl_encoding_value_apos_state2;
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': case '_': case '-':
break;
LINE_ENDINGS
default:
@@ -427,7 +749,7 @@
}
FAXPP_Error
-xml_decl_encoding_value_apos_state2(FAXPP_TokenizerEnv *env)
+xml_decl_encoding_value_apos_state(FAXPP_TokenizerEnv *env)
{
read_char(env);
Modified: trunk/faxpp/tests/xmlconf_runner.c
===================================================================
--- trunk/faxpp/tests/xmlconf_runner.c 2007-08-09 10:29:57 UTC (rev 11)
+++ trunk/faxpp/tests/xmlconf_runner.c 2007-08-11 00:29:45 UTC (rev 12)
@@ -214,7 +214,7 @@
*ptr = 0;
}
-FAXPP_Error run_test_case(const char *filename)
+FAXPP_Error run_test_case(const char *filename, unsigned int *errLine)
{
FAXPP_Parser *testparser = FAXPP_create_parser(WELL_FORMED_PARSE_MODE, FAXPP_utf8_encode);
@@ -225,11 +225,15 @@
}
FAXPP_Error err = FAXPP_init_parse_file(testparser, file);
- if(err != NO_ERROR) error(err, 0, 0);
+ if(err == NO_ERROR) {
+ while((err = FAXPP_next_event(testparser)) == 0) {
+ if(FAXPP_get_current_event(testparser)->type == END_DOCUMENT_EVENT)
+ break;
+ }
+ }
- while((err = FAXPP_next_event(testparser)) == 0) {
- if(FAXPP_get_current_event(testparser)->type == END_DOCUMENT_EVENT)
- break;
+ if(err != 0) {
+ *errLine = FAXPP_get_error_line(testparser);
}
fclose(file);
@@ -247,6 +251,7 @@
char base_buffer[1024];
char file_buffer[1024];
FAXPP_Error result;
+ unsigned int errLine;
int output_events = 0;
int test_failures = 0;
@@ -313,7 +318,7 @@
attr = find_attribute(event, "URI");
calculateBase(base_buffer, &attr->value, file_buffer);
- result = run_test_case(file_buffer);
+ result = run_test_case(file_buffer, &errLine);
if(result == DOCTYPE_NOT_IMPLEMENTED) {
printf("^");
fflush(stdout);
@@ -358,6 +363,10 @@
output_attr_value(&attr->value, stderr);
}
+ if(result != NO_ERROR) {
+ fprintf(stderr, "\nError: %s:%i", FAXPP_err_to_string(result), errLine);
+ }
+
fprintf(stderr, "\n");
if(event->type == SELF_CLOSING_ELEMENT_EVENT) {
fprintf(stderr, "\n");
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|