|
From: <tho...@us...> - 2012-02-11 20:39:56
|
Revision: 696
http://openautomation.svn.sourceforge.net/openautomation/?rev=696&view=rev
Author: thomas_s
Date: 2012-02-11 20:39:47 +0000 (Sat, 11 Feb 2012)
Log Message:
-----------
added determinator object with basic parsing of xml determinators
Modified Paths:
--------------
xPLHAL/branches/thomas_s_dev/src/CMakeLists.txt
xPLHAL/branches/thomas_s_dev/test/CMakeLists.txt
xPLHAL/branches/thomas_s_dev/test/test_devicemanager.cpp
Added Paths:
-----------
xPLHAL/branches/thomas_s_dev/src/determinator.cpp
xPLHAL/branches/thomas_s_dev/src/determinator.h
xPLHAL/branches/thomas_s_dev/src/determinatoritems.cpp
xPLHAL/branches/thomas_s_dev/src/determinatoritems.h
xPLHAL/branches/thomas_s_dev/src/pugiconfig.hpp
xPLHAL/branches/thomas_s_dev/src/pugixml.cpp
xPLHAL/branches/thomas_s_dev/src/pugixml.hpp
xPLHAL/branches/thomas_s_dev/test/determinator1.xml
xPLHAL/branches/thomas_s_dev/test/determinator2.xml
xPLHAL/branches/thomas_s_dev/test/determinatorDesc.xml
xPLHAL/branches/thomas_s_dev/test/test_determinator.cpp
Modified: xPLHAL/branches/thomas_s_dev/src/CMakeLists.txt
===================================================================
--- xPLHAL/branches/thomas_s_dev/src/CMakeLists.txt 2012-02-11 19:40:21 UTC (rev 695)
+++ xPLHAL/branches/thomas_s_dev/src/CMakeLists.txt 2012-02-11 20:39:47 UTC (rev 696)
@@ -17,11 +17,12 @@
project(xPLHAL)
cmake_minimum_required(VERSION 2.6)
-#set(CMAKE_CXX_FLAGS "-g -O2 -std=c++0x")
-set(CMAKE_CXX_FLAGS "-g -Os -std=c++0x")
+set(CMAKE_CXX_FLAGS "-g -std=c++0x")
+#set(CMAKE_CXX_FLAGS "-g -Os -std=c++0x")
set(xPLHAL_SRCS xplmessagequeue.cpp devicemanager.cpp
xplhandler.cpp xplcache.cpp xhcpthread.cpp log.cpp
- xhcp.cpp xplmessage.cpp recurring_timer.cpp main.cpp)
+ xhcp.cpp xplmessage.cpp recurring_timer.cpp main.cpp pugixml.cpp
+ determinatoritems.cpp determinator.cpp)
add_executable(xPLHAL ${xPLHAL_SRCS})
#message(STATUS "Boost_LIBRARIES=${Boost_LIBRARIES}")
Added: xPLHAL/branches/thomas_s_dev/src/determinator.cpp
===================================================================
--- xPLHAL/branches/thomas_s_dev/src/determinator.cpp (rev 0)
+++ xPLHAL/branches/thomas_s_dev/src/determinator.cpp 2012-02-11 20:39:47 UTC (rev 696)
@@ -0,0 +1,111 @@
+#include "determinator.h"
+#include <iostream>
+#include <cxxabi.h>
+#include <typeinfo>
+#include <memory>
+
+using std::string;
+using std::vector;
+using std::cerr;
+using std::endl;
+
+void Determinator::printDeterminator() const
+{
+ cerr << "Determinator '" << name << "'";
+ cerr << "\n guid: " << guid;
+ cerr << "\n name: " << name;
+ cerr << "\n description: " << description;
+ cerr << "\n enabled: " << enabled;
+
+ cerr << "\n Inputs:\n";
+ for (auto input : inputs) {
+ cerr << " " << input.second->toString() << endl;
+ }
+
+ cerr << "\n Outputs:\n";
+ for (auto output : outputs) {
+ cerr << " " << output.second->toString() << endl;
+ }
+
+ cerr << endl;
+}
+
+DeterminatorXmlParser::DeterminatorXmlParser(const string& filename)
+{
+ registerCondition(BaseDeterminatorItemConstPtr(new XplCondition));
+ registerCondition(BaseDeterminatorItemConstPtr(new GlobalCondition));
+ registerCondition(BaseDeterminatorItemConstPtr(new GlobalChanged));
+ registerCondition(BaseDeterminatorItemConstPtr(new DayCondition));
+ registerCondition(BaseDeterminatorItemConstPtr(new TimeCondition));
+
+ registerAction(BaseDeterminatorItemConstPtr(new LogAction));
+
+ pugi::xml_parse_result result = m_doc.load_file(filename.c_str());
+ cerr << "Load result: " << result.description() << "\n";
+}
+
+void DeterminatorXmlParser::registerCondition(BaseDeterminatorItemConstPtr condition)
+{
+ m_conditionmap[condition->item_name] = condition;
+}
+
+void DeterminatorXmlParser::registerAction(BaseDeterminatorItemConstPtr action)
+{
+ m_actionmap[action->item_name] = action;
+}
+
+Determinator DeterminatorXmlParser::parse()
+{
+ try {
+ pugi::xml_node base = getNode(m_doc, "xplDeterminator");
+ pugi::xml_node base_d = getNode(base, "determinator");
+
+ bool isGroup = base_d.attribute("isGroup").value() == "Y";
+
+ if (isGroup == false) {
+ Determinator d;
+ d.guid = base_d.attribute("guid").value();
+ d.name = base_d.attribute("name").value();
+ d.description = base_d.attribute("description").value();
+ d.enabled = base_d.attribute("guid").value() == "Y";
+
+ pugi::xml_node input = base_d.child("input");
+ pugi::xml_node output = base_d.child("output");
+
+ for(auto condition : m_conditionmap) {
+ pugi::xml_node action_node = input.child(condition.first.c_str());
+ if (action_node) {
+ d.inputs.insert({condition.first, BaseDeterminatorItemPtr(condition.second->createNew(action_node))} );
+ }
+ }
+ for(auto action : m_actionmap) {
+ pugi::xml_node action_node = output.child(action.first.c_str());
+ if (action_node) {
+ d.outputs.insert({action.first, BaseDeterminatorItemPtr(action.second->createNew(action_node))} );
+ }
+ }
+
+ d.printDeterminator();
+ return d;
+ }
+
+ }
+ catch(const std::exception& e) {
+ int status;
+ char* realname = abi::__cxa_demangle(typeid(e).name(), 0, 0, &status);
+ cerr << "Exception: " << realname << " => " << e.what() << endl;
+ throw;
+ }
+
+}
+
+
+pugi::xml_node DeterminatorXmlParser::getNode(const pugi::xml_node& base, const string& childname)
+{
+ pugi::xml_node node = base.child(childname.c_str());
+ if (!node) {
+ throw DeterminatorParseException("node '" + childname +"' not found");
+ }
+ return node;
+}
+
Added: xPLHAL/branches/thomas_s_dev/src/determinator.h
===================================================================
--- xPLHAL/branches/thomas_s_dev/src/determinator.h (rev 0)
+++ xPLHAL/branches/thomas_s_dev/src/determinator.h 2012-02-11 20:39:47 UTC (rev 696)
@@ -0,0 +1,41 @@
+#pragma once
+#include "pugixml.hpp"
+#include "determinatoritems.h"
+#include <string>
+#include <vector>
+#include <map>
+
+class Determinator
+{
+ public:
+ enum class match_type { ALL, ANY };
+
+ void printDeterminator() const;
+
+ std::string guid;
+ std::string name;
+ std::string description;
+ bool enabled;
+ match_type input_match_type;
+
+ std::multimap<std::string, BaseDeterminatorItemPtr> inputs;
+ std::multimap<std::string, BaseDeterminatorItemPtr> outputs;
+};
+
+class DeterminatorXmlParser
+{
+ public:
+ DeterminatorXmlParser(const std::string& filename);
+
+ void registerCondition(BaseDeterminatorItemConstPtr condition);
+ void registerAction(BaseDeterminatorItemConstPtr action);
+ Determinator parse();
+
+ private:
+ pugi::xml_node getNode(const pugi::xml_node& base, const std::string& childname);
+
+ pugi::xml_document m_doc;
+ std::map<std::string, std::shared_ptr<const BaseDeterminatorItem>> m_conditionmap;
+ std::map<std::string, std::shared_ptr<const BaseDeterminatorItem>> m_actionmap;
+};
+
Added: xPLHAL/branches/thomas_s_dev/src/determinatoritems.cpp
===================================================================
--- xPLHAL/branches/thomas_s_dev/src/determinatoritems.cpp (rev 0)
+++ xPLHAL/branches/thomas_s_dev/src/determinatoritems.cpp 2012-02-11 20:39:47 UTC (rev 696)
@@ -0,0 +1,277 @@
+#include "determinatoritems.h"
+#include <cxxabi.h>
+#include <iostream>
+#include <typeinfo>
+#include <iostream>
+
+using std::string;
+using std::vector;
+
+ConditionParseException::ConditionParseException(const string& text)
+:m_text(text)
+{
+}
+
+const char* ConditionParseException::what() const throw()
+{
+ return m_text.c_str();
+
+}
+
+DeterminatorParseException::DeterminatorParseException(const string& text)
+:m_text(text)
+{
+}
+
+const char* DeterminatorParseException::what() const throw()
+{
+ return m_text.c_str();
+}
+
+
+class ScopedXmlAttributeGetter
+{
+ public:
+ ScopedXmlAttributeGetter(const pugi::xml_node& basenode) :m_basenode(basenode) {}
+ string get(const string& attribute_name) const {
+ pugi::xml_attribute xml_attribute = m_basenode.attribute(attribute_name.c_str());
+ if (!xml_attribute) {
+ string error_text = string("In node '") + m_basenode.name() + "'";
+ error_text += " attribute '" + attribute_name + "' was not found";
+ throw ConditionParseException(error_text);
+ }
+ return xml_attribute.value();
+ }
+ private:
+ const pugi::xml_node& m_basenode;
+};
+
+BaseDeterminatorItem::BaseDeterminatorItem(const string& name)
+:item_name(name)
+{
+}
+
+BaseDeterminatorItem::BaseDeterminatorItem(const pugi::xml_node& basenode, const string& name)
+:item_name(name)
+{
+ ScopedXmlAttributeGetter a(basenode);
+ display_name = a.get("display_name");
+}
+
+/*
+ * Determinator Conditions
+ */
+
+XplCondition::XplCondition()
+:BaseDeterminatorItem("xplCondition")
+{
+}
+
+XplCondition::XplCondition(const pugi::xml_node& basenode)
+:BaseDeterminatorItem(basenode, "xplCondition")
+{
+ parseFromXml(basenode);
+}
+
+BaseDeterminatorItemPtr XplCondition::createNew(const pugi::xml_node& basenode) const
+{
+ return BaseDeterminatorItemPtr(new XplCondition(basenode));
+}
+
+void XplCondition::parseFromXml(const pugi::xml_node& basenode)
+{
+ ScopedXmlAttributeGetter helper(basenode);
+ msg_type = helper.get("msg_type");
+ source_vendor = helper.get("source_vendor");
+ source_device = helper.get("source_device");
+ source_instance = helper.get("source_instance");
+ target_vendor = helper.get("target_vendor");
+ target_device = helper.get("target_device");
+ target_instance = helper.get("target_instance");
+ schema_class = helper.get("schema_class");
+ schema_type = helper.get("schema_type");
+
+ for(const auto node : basenode) {
+ if (node.name() == string("param")) {
+ struct parameter p;
+ ScopedXmlAttributeGetter pa(node);
+ p.name = pa.get("name");
+ p.op = pa.get("operator");
+ p.value = pa.get("value");
+ parameter.push_back(p);
+ }
+ }
+}
+
+string XplCondition::toString() const
+{
+ string ret = "xplCondition:";
+ ret += "\nmsg_type: " + msg_type;
+ ret += "\nsource_vendor: " + source_vendor;
+ ret += "\nsource_device: " + source_device;
+ ret += "\nsource_instance: " + source_instance;
+ ret += "\ntarget_vendor: " + target_vendor;
+ ret += "\ntarget_device: " + target_device;
+ ret += "\ntarget_instance: " + target_instance;
+ ret += "\nschema_class: " + schema_class;
+ ret += "\nschema_type: " + schema_type;
+ for (auto p : parameter) {
+ ret +="\nparameter: " + p.name + p.op + p.value;
+ }
+ return ret;
+}
+
+
+GlobalCondition::GlobalCondition()
+:BaseDeterminatorItem("globalCondition")
+{
+}
+
+GlobalCondition::GlobalCondition(const pugi::xml_node& basenode)
+:BaseDeterminatorItem(basenode, "globalCondition")
+{
+ parseFromXml(basenode);
+}
+
+BaseDeterminatorItemPtr GlobalCondition::createNew(const pugi::xml_node& basenode) const
+{
+ return BaseDeterminatorItemPtr(new GlobalCondition(basenode));
+}
+
+void GlobalCondition::parseFromXml(const pugi::xml_node& basenode)
+{
+ name = basenode.attribute("name").value();
+ op = basenode.attribute("operator").value();
+ value = basenode.attribute("value").value();
+}
+
+string GlobalCondition::toString() const
+{
+ string ret = item_name + ":";
+ ret += "\nname: " + name;
+ ret += "\noperator: " + op;
+ ret += "\nvalue: " + value;
+ return ret;
+}
+
+GlobalChanged::GlobalChanged()
+:BaseDeterminatorItem("globalChanged")
+{
+}
+
+GlobalChanged::GlobalChanged(const pugi::xml_node& basenode)
+:BaseDeterminatorItem(basenode, "globalChanged")
+{
+ parseFromXml(basenode);
+}
+
+BaseDeterminatorItemPtr GlobalChanged::createNew(const pugi::xml_node& basenode) const
+{
+ return BaseDeterminatorItemPtr(new GlobalChanged(basenode));
+}
+
+void GlobalChanged::parseFromXml(const pugi::xml_node& basenode)
+{
+ name = basenode.attribute("name").value();
+}
+
+string GlobalChanged::toString() const
+{
+ string ret = item_name + ":";
+ ret += "\nname: " + name;
+ return ret;
+}
+
+
+DayCondition::DayCondition()
+:BaseDeterminatorItem("dayCondition")
+{
+}
+
+DayCondition::DayCondition(const pugi::xml_node& basenode)
+:BaseDeterminatorItem(basenode, "dayCondition")
+{
+ parseFromXml(basenode);
+}
+
+BaseDeterminatorItemPtr DayCondition::createNew(const pugi::xml_node& basenode) const
+{
+ return BaseDeterminatorItemPtr(new DayCondition(basenode));
+}
+
+void DayCondition::parseFromXml(const pugi::xml_node& basenode)
+{
+ dow = basenode.attribute("dow").value();
+}
+
+string DayCondition::toString() const
+{
+ string ret = item_name + ":";
+ ret += "\ndow: " + dow;
+ return ret;
+}
+
+TimeCondition::TimeCondition()
+:BaseDeterminatorItem("timeCondition")
+{
+}
+
+TimeCondition::TimeCondition(const pugi::xml_node& basenode)
+ :BaseDeterminatorItem(basenode, "timeCondition")
+{
+ parseFromXml(basenode);
+}
+
+BaseDeterminatorItemPtr TimeCondition::createNew(const pugi::xml_node& basenode) const
+{
+ return BaseDeterminatorItemPtr(new TimeCondition(basenode));
+}
+
+void TimeCondition::parseFromXml(const pugi::xml_node& basenode)
+{
+ op = basenode.attribute("operator").value();
+ value = basenode.attribute("value").value();
+}
+
+string TimeCondition::toString() const
+{
+ string ret = item_name + ":";
+ ret += "\noperator: " + op;
+ ret += "\nvalue...: " + value;
+ return ret;
+}
+
+/*
+ * Determinator Actions
+ */
+
+LogAction::LogAction()
+:BaseDeterminatorItem("logAction")
+{
+}
+
+LogAction::LogAction(const pugi::xml_node& basenode)
+:BaseDeterminatorItem(basenode, "logAction")
+{
+ parseFromXml(basenode);
+}
+
+BaseDeterminatorItemPtr LogAction::createNew(const pugi::xml_node& basenode) const
+{
+ return BaseDeterminatorItemPtr(new LogAction(basenode));
+}
+
+void LogAction::parseFromXml(const pugi::xml_node& basenode)
+{
+ logText = basenode.attribute("logText").value();
+ executeOrder = basenode.attribute("executeOrder").value();
+}
+
+std::string LogAction::toString() const
+{
+ string ret = item_name + ":";
+ ret += "\nlogText.....: " + logText;
+ ret += "\nexecuteOrder: " + executeOrder;
+ return ret;
+}
+
Added: xPLHAL/branches/thomas_s_dev/src/determinatoritems.h
===================================================================
--- xPLHAL/branches/thomas_s_dev/src/determinatoritems.h (rev 0)
+++ xPLHAL/branches/thomas_s_dev/src/determinatoritems.h 2012-02-11 20:39:47 UTC (rev 696)
@@ -0,0 +1,149 @@
+#pragma once
+#include "pugixml.hpp"
+#include <string>
+#include <vector>
+#include <memory>
+
+class ConditionParseException: public std::exception
+{
+ public:
+ ConditionParseException(const std::string& text);
+ virtual ~ConditionParseException() throw() {}
+ const char* what() const throw();
+ private:
+ std::string m_text;
+};
+
+class DeterminatorParseException: public std::exception
+{
+ public:
+ DeterminatorParseException(const std::string& text);
+ virtual ~DeterminatorParseException() throw() {}
+ const char* what() const throw();
+ private:
+ std::string m_text;
+};
+
+class BaseDeterminatorItem;
+typedef std::shared_ptr<BaseDeterminatorItem> BaseDeterminatorItemPtr;
+typedef std::shared_ptr<const BaseDeterminatorItem> BaseDeterminatorItemConstPtr;
+
+class BaseDeterminatorItem
+{
+ public:
+ BaseDeterminatorItem(const std::string& name);
+ BaseDeterminatorItem(const pugi::xml_node& basenode, const std::string& name);
+
+ virtual void parseFromXml(const pugi::xml_node& basenode) = 0;
+ virtual BaseDeterminatorItemPtr createNew(const pugi::xml_node& basenode) const = 0;
+ virtual std::string toString() const = 0;
+
+ std::string item_name;
+ std::string display_name;
+
+ //boost::signal2::signal<void ()> sigChanged;
+};
+
+class XplCondition: public BaseDeterminatorItem
+{
+ public:
+ XplCondition();
+ XplCondition(const pugi::xml_node& basenode);
+
+ virtual BaseDeterminatorItemPtr createNew(const pugi::xml_node& basenode) const;
+
+ void parseFromXml(const pugi::xml_node& basenode);
+
+ std::string toString() const;
+
+ /* connect to signal of new xpl-message */
+ std::string msg_type;
+ std::string source_vendor;
+ std::string source_device;
+ std::string source_instance;
+ std::string target_vendor;
+ std::string target_device;
+ std::string target_instance;
+ std::string schema_class;
+ std::string schema_type;
+
+ struct parameter {
+ std::string name;
+ std::string op;
+ std::string value;
+ };
+
+ std::vector<struct parameter> parameter;
+};
+
+class GlobalCondition: public BaseDeterminatorItem
+{
+ public:
+ GlobalCondition();
+ GlobalCondition(const pugi::xml_node& basenode);
+
+ virtual BaseDeterminatorItemPtr createNew(const pugi::xml_node& basenode) const;
+
+ void parseFromXml(const pugi::xml_node& basenode);
+
+ std::string toString() const;
+
+ std::string name;
+ std::string op;
+ std::string value;
+};
+
+class GlobalChanged: public BaseDeterminatorItem
+{
+ public:
+ /* connect to signal of changed global variable */
+ GlobalChanged();
+ GlobalChanged(const pugi::xml_node& basenode);
+
+ virtual BaseDeterminatorItemPtr createNew(const pugi::xml_node& basenode) const;
+ void parseFromXml(const pugi::xml_node& basenode);
+ std::string toString() const;
+
+ std::string name;
+};
+
+class DayCondition: public BaseDeterminatorItem
+{
+ public:
+ DayCondition();
+ DayCondition(const pugi::xml_node& basenode);
+
+ virtual BaseDeterminatorItemPtr createNew(const pugi::xml_node& basenode) const;
+ void parseFromXml(const pugi::xml_node& basenode);
+ std::string toString() const;
+
+ std::string dow;
+};
+
+class TimeCondition: public BaseDeterminatorItem
+{
+ public:
+ TimeCondition();
+ TimeCondition(const pugi::xml_node& basenode);
+
+ virtual BaseDeterminatorItemPtr createNew(const pugi::xml_node& basenode) const;
+ void parseFromXml(const pugi::xml_node& basenode);
+ std::string toString() const;
+
+ std::string op;
+ std::string value;
+};
+
+class LogAction: public BaseDeterminatorItem
+{
+ public:
+ LogAction();
+ LogAction(const pugi::xml_node& basenode);
+
+ virtual BaseDeterminatorItemPtr createNew(const pugi::xml_node& basenode) const;
+ void parseFromXml(const pugi::xml_node& basenode);
+ std::string toString() const;
+
+ std::string logText;
+ std::string executeOrder;
+};
Added: xPLHAL/branches/thomas_s_dev/src/pugiconfig.hpp
===================================================================
--- xPLHAL/branches/thomas_s_dev/src/pugiconfig.hpp (rev 0)
+++ xPLHAL/branches/thomas_s_dev/src/pugiconfig.hpp 2012-02-11 20:39:47 UTC (rev 696)
@@ -0,0 +1,62 @@
+/**
+ * pugixml parser - version 1.0
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2010, by Arseny Kapoulkine (ars...@gm...)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kr...@ti...)
+ */
+
+#ifndef HEADER_PUGICONFIG_HPP
+#define HEADER_PUGICONFIG_HPP
+
+// Uncomment this to enable wchar_t mode
+// #define PUGIXML_WCHAR_MODE
+
+// Uncomment this to disable XPath
+#define PUGIXML_NO_XPATH
+
+// Uncomment this to disable STL
+// Note: you can't use XPath with PUGIXML_NO_STL
+// #define PUGIXML_NO_STL
+
+// Uncomment this to disable exceptions
+// Note: you can't use XPath with PUGIXML_NO_EXCEPTIONS
+// #define PUGIXML_NO_EXCEPTIONS
+
+// Set this to control attributes for public classes/functions, i.e.:
+// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
+// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
+// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
+// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
+
+#endif
+
+/**
+ * Copyright (c) 2006-2010 Arseny Kapoulkine
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
Added: xPLHAL/branches/thomas_s_dev/src/pugixml.cpp
===================================================================
--- xPLHAL/branches/thomas_s_dev/src/pugixml.cpp (rev 0)
+++ xPLHAL/branches/thomas_s_dev/src/pugixml.cpp 2012-02-11 20:39:47 UTC (rev 696)
@@ -0,0 +1,9576 @@
+/**
+ * pugixml parser - version 1.0
+ * --------------------------------------------------------
+ * Copyright (C) 2006-2010, by Arseny Kapoulkine (ars...@gm...)
+ * Report bugs and download new versions at http://pugixml.org/
+ *
+ * This library is distributed under the MIT License. See notice at the end
+ * of this file.
+ *
+ * This work is based on the pugxml parser, which is:
+ * Copyright (C) 2003, by Kristen Wegner (kr...@ti...)
+ */
+
+#include "pugixml.hpp"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <setjmp.h>
+#include <wchar.h>
+
+#ifndef PUGIXML_NO_XPATH
+# include <math.h>
+# include <float.h>
+#endif
+
+#ifndef PUGIXML_NO_STL
+# include <istream>
+# include <ostream>
+# include <string>
+#endif
+
+// For placement new
+#include <new>
+
+#ifdef _MSC_VER
+# pragma warning(disable: 4127) // conditional expression is constant
+# pragma warning(disable: 4324) // structure was padded due to __declspec(align())
+# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
+# pragma warning(disable: 4702) // unreachable code
+# pragma warning(disable: 4996) // this function or variable may be unsafe
+#endif
+
+#ifdef __INTEL_COMPILER
+# pragma warning(disable: 177) // function was declared but never referenced
+# pragma warning(disable: 1478 1786) // function was declared "deprecated"
+#endif
+
+#ifdef __BORLANDC__
+# pragma warn -8008 // condition is always false
+# pragma warn -8066 // unreachable code
+#endif
+
+#ifdef __SNC__
+# pragma diag_suppress=178 // function was declared but never referenced
+# pragma diag_suppress=237 // controlling expression is constant
+#endif
+
+// uintptr_t
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
+# include <stdint.h>
+#else
+# if _MSC_VER < 1300
+// No native uintptr_t in MSVC6
+typedef size_t uintptr_t;
+# endif
+typedef unsigned __int8 uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+typedef __int32 int32_t;
+#endif
+
+// Inlining controls
+#if defined(_MSC_VER) && _MSC_VER >= 1300
+# define PUGIXML_NO_INLINE __declspec(noinline)
+#elif defined(__GNUC__)
+# define PUGIXML_NO_INLINE __attribute__((noinline))
+#else
+# define PUGIXML_NO_INLINE
+#endif
+
+// Simple static assertion
+#define STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
+
+// Digital Mars C++ bug workaround for passing char loaded from memory via stack
+#ifdef __DMC__
+# define DMC_VOLATILE volatile
+#else
+# define DMC_VOLATILE
+#endif
+
+using namespace pugi;
+
+// Memory allocation
+namespace
+{
+ void* default_allocate(size_t size)
+ {
+ return malloc(size);
+ }
+
+ void default_deallocate(void* ptr)
+ {
+ free(ptr);
+ }
+
+ allocation_function global_allocate = default_allocate;
+ deallocation_function global_deallocate = default_deallocate;
+}
+
+// String utilities
+namespace
+{
+ // Get string length
+ size_t strlength(const char_t* s)
+ {
+ assert(s);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcslen(s);
+ #else
+ return strlen(s);
+ #endif
+ }
+
+ // Compare two strings
+ bool strequal(const char_t* src, const char_t* dst)
+ {
+ assert(src && dst);
+
+ #ifdef PUGIXML_WCHAR_MODE
+ return wcscmp(src, dst) == 0;
+ #else
+ return strcmp(src, dst) == 0;
+ #endif
+ }
+
+ // Compare lhs with [rhs_begin, rhs_end)
+ bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
+ {
+ for (size_t i = 0; i < count; ++i)
+ if (lhs[i] != rhs[i])
+ return false;
+
+ return lhs[count] == 0;
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ // Convert string to wide string, assuming all symbols are ASCII
+ void widen_ascii(wchar_t* dest, const char* source)
+ {
+ for (const char* i = source; *i; ++i) *dest++ = *i;
+ *dest = 0;
+ }
+#endif
+}
+
+#if !defined(PUGIXML_NO_STL) || !defined(PUGIXML_NO_XPATH)
+// auto_ptr-like buffer holder for exception recovery
+namespace
+{
+ struct buffer_holder
+ {
+ void* data;
+ void (*deleter)(void*);
+
+ buffer_holder(void* data, void (*deleter)(void*)): data(data), deleter(deleter)
+ {
+ }
+
+ ~buffer_holder()
+ {
+ if (data) deleter(data);
+ }
+
+ void* release()
+ {
+ void* result = data;
+ data = 0;
+ return result;
+ }
+ };
+}
+#endif
+
+namespace
+{
+ static const size_t xml_memory_page_size = 32768;
+
+ static const uintptr_t xml_memory_page_alignment = 32;
+ static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
+ static const uintptr_t xml_memory_page_name_allocated_mask = 16;
+ static const uintptr_t xml_memory_page_value_allocated_mask = 8;
+ static const uintptr_t xml_memory_page_type_mask = 7;
+
+ struct xml_allocator;
+
+ struct xml_memory_page
+ {
+ static xml_memory_page* construct(void* memory)
+ {
+ if (!memory) return 0; //$ redundant, left for performance
+
+ xml_memory_page* result = static_cast<xml_memory_page*>(memory);
+
+ result->allocator = 0;
+ result->memory = 0;
+ result->prev = 0;
+ result->next = 0;
+ result->busy_size = 0;
+ result->freed_size = 0;
+
+ return result;
+ }
+
+ xml_allocator* allocator;
+
+ void* memory;
+
+ xml_memory_page* prev;
+ xml_memory_page* next;
+
+ size_t busy_size;
+ size_t freed_size;
+
+ char data[1];
+ };
+
+ struct xml_memory_string_header
+ {
+ uint16_t page_offset; // offset from page->data
+ uint16_t full_size; // 0 if string occupies whole page
+ };
+
+ struct xml_allocator
+ {
+ xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
+ {
+ }
+
+ xml_memory_page* allocate_page(size_t data_size)
+ {
+ size_t size = offsetof(xml_memory_page, data) + data_size;
+
+ // allocate block with some alignment, leaving memory for worst-case padding
+ void* memory = global_allocate(size + xml_memory_page_alignment);
+ if (!memory) return 0;
+
+ // align upwards to page boundary
+ void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(memory) + (xml_memory_page_alignment - 1)) & ~(xml_memory_page_alignment - 1));
+
+ // prepare page structure
+ xml_memory_page* page = xml_memory_page::construct(page_memory);
+
+ page->memory = memory;
+ page->allocator = _root->allocator;
+
+ return page;
+ }
+
+ static void deallocate_page(xml_memory_page* page)
+ {
+ global_deallocate(page->memory);
+ }
+
+ void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
+
+ void* allocate_memory(size_t size, xml_memory_page*& out_page)
+ {
+ if (_busy_size + size > xml_memory_page_size) return allocate_memory_oob(size, out_page);
+
+ void* buf = _root->data + _busy_size;
+
+ _busy_size += size;
+
+ out_page = _root;
+
+ return buf;
+ }
+
+ void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
+ {
+ if (page == _root) page->busy_size = _busy_size;
+
+ assert(ptr >= page->data && ptr < page->data + page->busy_size);
+ (void)!ptr;
+
+ page->freed_size += size;
+ assert(page->freed_size <= page->busy_size);
+
+ if (page->freed_size == page->busy_size)
+ {
+ if (page->next == 0)
+ {
+ assert(_root == page);
+
+ // top page freed, just reset sizes
+ page->busy_size = page->freed_size = 0;
+ _busy_size = 0;
+ }
+ else
+ {
+ assert(_root != page);
+ assert(page->prev);
+
+ // remove from the list
+ page->prev->next = page->next;
+ page->next->prev = page->prev;
+
+ // deallocate
+ deallocate_page(page);
+ }
+ }
+ }
+
+ char_t* allocate_string(size_t length)
+ {
+ // allocate memory for string and header block
+ size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
+
+ // round size up to pointer alignment boundary
+ size_t full_size = (size + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1);
+
+ xml_memory_page* page;
+ xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
+
+ if (!header) return 0;
+
+ // setup header
+ ptrdiff_t page_offset = reinterpret_cast<char*>(header) - page->data;
+
+ assert(page_offset >= 0 && page_offset < (1 << 16));
+ header->page_offset = static_cast<uint16_t>(page_offset);
+
+ // full_size == 0 for large strings that occupy the whole page
+ assert(full_size < (1 << 16) || (page->busy_size == full_size && page_offset == 0));
+ header->full_size = static_cast<uint16_t>(full_size < (1 << 16) ? full_size : 0);
+
+ return reinterpret_cast<char_t*>(header + 1);
+ }
+
+ void deallocate_string(char_t* string)
+ {
+ // get header
+ xml_memory_string_header* header = reinterpret_cast<xml_memory_string_header*>(string) - 1;
+
+ // deallocate
+ size_t page_offset = offsetof(xml_memory_page, data) + header->page_offset;
+ xml_memory_page* page = reinterpret_cast<xml_memory_page*>(reinterpret_cast<char*>(header) - page_offset);
+
+ // if full_size == 0 then this string occupies the whole page
+ size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size;
+
+ deallocate_memory(header, full_size, page);
+ }
+
+ xml_memory_page* _root;
+ size_t _busy_size;
+ };
+
+ PUGIXML_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
+ {
+ const size_t large_allocation_threshold = xml_memory_page_size / 4;
+
+ xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
+ if (!page) return 0;
+
+ if (size <= large_allocation_threshold)
+ {
+ _root->busy_size = _busy_size;
+
+ // insert page at the end of linked list
+ page->prev = _root;
+ _root->next = page;
+ _root = page;
+
+ _busy_size = size;
+ }
+ else
+ {
+ // insert page before the end of linked list, so that it is deleted as soon as possible
+ // the last page is not deleted even if it's empty (see deallocate_memory)
+ assert(_root->prev);
+
+ page->prev = _root->prev;
+ page->next = _root;
+
+ _root->prev->next = page;
+ _root->prev = page;
+ }
+
+ // allocate inside page
+ page->busy_size = size;
+
+ out_page = page;
+ return page->data;
+ }
+}
+
+namespace pugi
+{
+ /// A 'name=value' XML attribute structure.
+ struct xml_attribute_struct
+ {
+ /// Default ctor
+ xml_attribute_struct(xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
+ {
+ }
+
+ uintptr_t header;
+
+ char_t* name; ///< Pointer to attribute name.
+ char_t* value; ///< Pointer to attribute value.
+
+ xml_attribute_struct* prev_attribute_c; ///< Previous attribute (cyclic list)
+ xml_attribute_struct* next_attribute; ///< Next attribute
+ };
+
+ /// An XML document tree node.
+ struct xml_node_struct
+ {
+ /// Default ctor
+ /// \param type - node type
+ xml_node_struct(xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), parent(0), name(0), value(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
+ {
+ }
+
+ uintptr_t header;
+
+ xml_node_struct* parent; ///< Pointer to parent
+
+ char_t* name; ///< Pointer to element name.
+ char_t* value; ///< Pointer to any associated string data.
+
+ xml_node_struct* first_child; ///< First child
+
+ xml_node_struct* prev_sibling_c; ///< Left brother (cyclic list)
+ xml_node_struct* next_sibling; ///< Right brother
+
+ xml_attribute_struct* first_attribute; ///< First attribute
+ };
+}
+
+namespace
+{
+ struct xml_document_struct: public xml_node_struct, public xml_allocator
+ {
+ xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0)
+ {
+ }
+
+ const char_t* buffer;
+ };
+
+ static inline xml_allocator& get_allocator(const xml_node_struct* node)
+ {
+ assert(node);
+
+ return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
+ }
+}
+
+// Low-level DOM operations
+namespace
+{
+ inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
+ {
+ xml_memory_page* page;
+ void* memory = alloc.allocate_memory(sizeof(xml_attribute_struct), page);
+
+ return new (memory) xml_attribute_struct(page);
+ }
+
+ inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
+ {
+ xml_memory_page* page;
+ void* memory = alloc.allocate_memory(sizeof(xml_node_struct), page);
+
+ return new (memory) xml_node_struct(page, type);
+ }
+
+ inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
+ {
+ uintptr_t header = a->header;
+
+ if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(a->name);
+ if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(a->value);
+
+ alloc.deallocate_memory(a, sizeof(xml_attribute_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
+ }
+
+ inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
+ {
+ uintptr_t header = n->header;
+
+ if (header & xml_memory_page_name_allocated_mask) alloc.deallocate_string(n->name);
+ if (header & xml_memory_page_value_allocated_mask) alloc.deallocate_string(n->value);
+
+ for (xml_attribute_struct* attr = n->first_attribute; attr; )
+ {
+ xml_attribute_struct* next = attr->next_attribute;
+
+ destroy_attribute(attr, alloc);
+
+ attr = next;
+ }
+
+ for (xml_node_struct* child = n->first_child; child; )
+ {
+ xml_node_struct* next = child->next_sibling;
+
+ destroy_node(child, alloc);
+
+ child = next;
+ }
+
+ alloc.deallocate_memory(n, sizeof(xml_node_struct), reinterpret_cast<xml_memory_page*>(header & xml_memory_page_pointer_mask));
+ }
+
+ PUGIXML_NO_INLINE xml_node_struct* append_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
+ {
+ xml_node_struct* child = allocate_node(alloc, type);
+ if (!child) return 0;
+
+ child->parent = node;
+
+ xml_node_struct* first_child = node->first_child;
+
+ if (first_child)
+ {
+ xml_node_struct* last_child = first_child->prev_sibling_c;
+
+ last_child->next_sibling = child;
+ child->prev_sibling_c = last_child;
+ first_child->prev_sibling_c = child;
+ }
+ else
+ {
+ node->first_child = child;
+ child->prev_sibling_c = child;
+ }
+
+ return child;
+ }
+
+ PUGIXML_NO_INLINE xml_attribute_struct* append_attribute_ll(xml_node_struct* node, xml_allocator& alloc)
+ {
+ xml_attribute_struct* a = allocate_attribute(alloc);
+ if (!a) return 0;
+
+ xml_attribute_struct* first_attribute = node->first_attribute;
+
+ if (first_attribute)
+ {
+ xml_attribute_struct* last_attribute = first_attribute->prev_attribute_c;
+
+ last_attribute->next_attribute = a;
+ a->prev_attribute_c = last_attribute;
+ first_attribute->prev_attribute_c = a;
+ }
+ else
+ {
+ node->first_attribute = a;
+ a->prev_attribute_c = a;
+ }
+
+ return a;
+ }
+}
+
+// Helper classes for code generation
+namespace
+{
+ struct opt_false
+ {
+ enum { value = 0 };
+ };
+
+ struct opt_true
+ {
+ enum { value = 1 };
+ };
+}
+
+// Unicode utilities
+namespace
+{
+ inline uint16_t endian_swap(uint16_t value)
+ {
+ return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
+ }
+
+ inline uint32_t endian_swap(uint32_t value)
+ {
+ return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
+ }
+
+ struct utf8_counter
+ {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ // U+0000..U+007F
+ if (ch < 0x80) return result + 1;
+ // U+0080..U+07FF
+ else if (ch < 0x800) return result + 2;
+ // U+0800..U+FFFF
+ else return result + 3;
+ }
+
+ static value_type high(value_type result, uint32_t)
+ {
+ // U+10000..U+10FFFF
+ return result + 4;
+ }
+ };
+
+ struct utf8_writer
+ {
+ typedef uint8_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ // U+0000..U+007F
+ if (ch < 0x80)
+ {
+ *result = static_cast<uint8_t>(ch);
+ return result + 1;
+ }
+ // U+0080..U+07FF
+ else if (ch < 0x800)
+ {
+ result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
+ result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+ return result + 2;
+ }
+ // U+0800..U+FFFF
+ else
+ {
+ result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
+ result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
+ result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+ return result + 3;
+ }
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ // U+10000..U+10FFFF
+ result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
+ result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
+ result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
+ result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
+ return result + 4;
+ }
+
+ static value_type any(value_type result, uint32_t ch)
+ {
+ return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+ }
+ };
+
+ struct utf16_counter
+ {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t)
+ {
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t)
+ {
+ return result + 2;
+ }
+ };
+
+ struct utf16_writer
+ {
+ typedef uint16_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ *result = static_cast<uint16_t>(ch);
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ uint32_t msh = (uint32_t)(ch - 0x10000) >> 10;
+ uint32_t lsh = (uint32_t)(ch - 0x10000) & 0x3ff;
+
+ result[0] = static_cast<uint16_t>(0xD800 + msh);
+ result[1] = static_cast<uint16_t>(0xDC00 + lsh);
+
+ return result + 2;
+ }
+
+ static value_type any(value_type result, uint32_t ch)
+ {
+ return (ch < 0x10000) ? low(result, ch) : high(result, ch);
+ }
+ };
+
+ struct utf32_counter
+ {
+ typedef size_t value_type;
+
+ static value_type low(value_type result, uint32_t)
+ {
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t)
+ {
+ return result + 1;
+ }
+ };
+
+ struct utf32_writer
+ {
+ typedef uint32_t* value_type;
+
+ static value_type low(value_type result, uint32_t ch)
+ {
+ *result = ch;
+
+ return result + 1;
+ }
+
+ static value_type high(value_type result, uint32_t ch)
+ {
+ *result = ch;
+
+ return result + 1;
+ }
+
+ static value_type any(value_type result, uint32_t ch)
+ {
+ *result = ch;
+
+ return result + 1;
+ }
+ };
+
+ template <size_t size> struct wchar_selector;
+
+ template <> struct wchar_selector<2>
+ {
+ typedef uint16_t type;
+ typedef utf16_counter counter;
+ typedef utf16_writer writer;
+ };
+
+ template <> struct wchar_selector<4>
+ {
+ typedef uint32_t type;
+ typedef utf32_counter counter;
+ typedef utf32_writer writer;
+ };
+
+ typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
+ typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
+
+ template <typename Traits, typename opt_swap = opt_false> struct utf_decoder
+ {
+ static inline typename Traits::value_type decode_utf8_block(const uint8_t* data, size_t size, typename Traits::value_type result)
+ {
+ const uint8_t utf8_byte_mask = 0x3f;
+
+ while (size)
+ {
+ uint8_t lead = *data;
+
+ // 0xxxxxxx -> U+0000..U+007F
+ if (lead < 0x80)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ size -= 1;
+
+ // process aligned single-byte (ascii) blocks
+ if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
+ {
+ while (size >= 4 && (*reinterpret_cast<const uint32_t*>(data) & 0x80808080) == 0)
+ {
+ result = Traits::low(result, data[0]);
+ result = Traits::low(result, data[1]);
+ result = Traits::low(result, data[2]);
+ result = Traits::low(result, data[3]);
+ data += 4;
+ size -= 4;
+ }
+ }
+ }
+ // 110xxxxx -> U+0080..U+07FF
+ else if ((unsigned)(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
+ {
+ result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
+ data += 2;
+ size -= 2;
+ }
+ // 1110xxxx -> U+0800-U+FFFF
+ else if ((unsigned)(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
+ {
+ result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
+ data += 3;
+ size -= 3;
+ }
+ // 11110xxx -> U+10000..U+10FFFF
+ else if ((unsigned)(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
+ {
+ result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
+ data += 4;
+ size -= 4;
+ }
+ // 10xxxxxx or 11111xxx -> invalid
+ else
+ {
+ data += 1;
+ size -= 1;
+ }
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_utf16_block(const uint16_t* data, size_t size, typename Traits::value_type result)
+ {
+ const uint16_t* end = data + size;
+
+ while (data < end)
+ {
+ uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
+
+ // U+0000..U+D7FF
+ if (lead < 0xD800)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ }
+ // U+E000..U+FFFF
+ else if ((unsigned)(lead - 0xE000) < 0x2000)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ }
+ // surrogate pair lead
+ else if ((unsigned)(lead - 0xD800) < 0x400 && data + 1 < end)
+ {
+ uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
+
+ if ((unsigned)(next - 0xDC00) < 0x400)
+ {
+ result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
+ data += 2;
+ }
+ else
+ {
+ data += 1;
+ }
+ }
+ else
+ {
+ data += 1;
+ }
+ }
+
+ return result;
+ }
+
+ static inline typename Traits::value_type decode_utf32_block(const uint32_t* data, size_t size, typename Traits::value_type result)
+ {
+ const uint32_t* end = data + size;
+
+ while (data < end)
+ {
+ uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
+
+ // U+0000..U+FFFF
+ if (lead < 0x10000)
+ {
+ result = Traits::low(result, lead);
+ data += 1;
+ }
+ // U+10000..U+10FFFF
+ else
+ {
+ result = Traits::high(result, lead);
+ data += 1;
+ }
+ }
+
+ return result;
+ }
+ };
+
+ template <typename T> inline void convert_utf_endian_swap(T* result, const T* data, size_t length)
+ {
+ for (size_t i = 0; i < length; ++i) result[i] = endian_swap(data[i]);
+ }
+
+ inline void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
+ {
+ for (size_t i = 0; i < length; ++i) result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
+ }
+}
+
+namespace
+{
+ enum chartype_t
+ {
+ ct_parse_pcdata = 1, // \0, &, \r, <
+ ct_parse_attr = 2, // \0, &, \r, ', "
+ ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
+ ct_space = 8, // \r, \n, space, tab
+ ct_parse_cdata = 16, // \0, ], >, \r
+ ct_parse_comment = 32, // \0, -, >, \r
+ ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
+ ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
+ };
+
+ const unsigned char chartype_table[256] =
+ {
+ 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
+ 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
+ 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
+ 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
+
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
+ 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
+ };
+
+ enum chartypex_t
+ {
+ ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
+ ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
+ ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
+ ctx_digit = 8, // 0-9
+ ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
+ };
+
+ const unsigned char chartypex_table[256] =
+ {
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
+ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
+
+ 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
+ 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
+
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
+ };
+
+#ifdef PUGIXML_WCHAR_MODE
+ #define IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
+#else
+ #define IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
+#endif
+
+ #define IS_CHARTYPE(c, ct) IS_CHARTYPE_IMPL(c, ct, chartype_table)
+ #define IS_CHARTYPEX(c, ct) IS_CHARTYPE_IMPL(c, ct, chartypex_table)
+
+ bool is_little_endian()
+ {
+ unsigned int ui = 1;
+
+ return *reinterpret_cast<unsigned char*>(&ui) == 1;
+ }
+
+ xml_encoding get_wchar_encoding()
+ {
+ STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
+
+ if (sizeof(wchar_t) == 2)
+ return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+ else
+ return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+ }
+
+ xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
+ {
+ // look for BOM in first few bytes
+ if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
+ if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
+ if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
+ if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
+ if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
+
+ // look for <, <? or <?xm in various encodings
+ if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
+ if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
+ if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
+ if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
+ if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
+
+ // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
+ if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
+ if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
+
+ // no known BOM detected, assume utf8
+ return encoding_utf8;
+ }
+
+ xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
+ {
+ // replace wchar encoding with utf implementation
+ if (encoding == encoding_wchar) return get_wchar_encoding();
+
+ // replace utf16 encoding with utf16 with specific endianness
+ if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ // replace utf32 encoding with utf32 with specific endianness
+ if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ // only do autodetection if no explicit encoding is requested
+ if (encoding != encoding_auto) return encoding;
+
+ // skip encoding autodetection if input buffer is too small
+ if (size < 4) return encoding_utf8;
+
+ // try to guess encoding (based on XML specification, Appendix F.1)
+ const uint8_t* data = static_cast<const uint8_t*>(contents);
+
+ DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
+
+ return guess_buffer_encoding(d0, d1, d2, d3);
+ }
+
+ bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+ {
+ if (is_mutable)
+ {
+ out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
+ }
+ else
+ {
+ void* buffer = global_allocate(size > 0 ? size : 1);
+ if (!buffer) return false;
+
+ memcpy(buffer, contents, size);
+
+ out_buffer = static_cast<char_t*>(buffer);
+ }
+
+ out_length = size / sizeof(char_t);
+
+ return true;
+ }
+
+#ifdef PUGIXML_WCHAR_MODE
+ inline bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
+ {
+ return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
+ (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
+ }
+
+ bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
+ {
+ const char_t* data = static_cast<const char_t*>(contents);
+
+ if (is_mutable)
+ {
+ out_buffer = const_cast<char_t*>(data);
+ }
+ else
+ {
+ out_buffer = static_cast<char_t*>(global_allocate(size > 0 ? size : 1));
+ if (!out_buffer) return false;
+ }
+
+ out_length = size / sizeof(char_t);
+
+ convert_wchar_endian_swap(out_buffer, data, out_length);
+
+ return true;
+ }
+
+ bool convert_buffer_utf8(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size)
+ {
+ const uint8_t* data = static_cast<const uint8_t*>(contents);
+
+ // first pass: get length in wchar_t units
+ out_length = utf_decoder<wchar_counter>::decode_utf8_block(data, size, 0);
+
+ // allocate buffer of suitable length
+ out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
+ if (!out_buffer) return false;
+
+ // second pass: convert utf8 input to wchar_t
+ wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
+ wchar_writer::value_type out_end = utf_decoder<wchar_writer>::decode_utf8_block(data, size, out_begin);
+
+ assert(out_end == out_begin + out_length);
+ (void)!out_end;
+
+ return true;
+ }
+
+ template <typename opt_swap> bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ {
+ const uint16_t* data = static_cast<const uint16_t*>(contents);
+ size_t length = size / sizeof(uint16_t);
+
+ // first pass: get length in wchar_t units
+ out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf16_block(data, length, 0);
+
+ // allocate buffer of suitable length
+ out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
+ if (!out_buffer) return false;
+
+ // second pass: convert utf16 input to wchar_t
+ wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
+ wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
+
+ assert(out_end == out_begin + out_length);
+ (void)!out_end;
+
+ return true;
+ }
+
+ template <typename opt_swap> bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ {
+ const uint32_t* data = static_cast<const uint32_t*>(contents);
+ size_t length = size / sizeof(uint32_t);
+
+ // first pass: get length in wchar_t units
+ out_length = utf_decoder<wchar_counter, opt_swap>::decode_utf32_block(data, length, 0);
+
+ // allocate buffer of suitable length
+ out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
+ if (!out_buffer) return false;
+
+ // second pass: convert utf32 input to wchar_t
+ wchar_writer::value_type out_begin = reinterpret_cast<wchar_writer::value_type>(out_buffer);
+ wchar_writer::value_type out_end = utf_decoder<wchar_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
+
+ assert(out_end == out_begin + out_length);
+ (void)!out_end;
+
+ return true;
+ }
+
+ bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+ {
+ // get native encoding
+ xml_encoding wchar_encoding = get_wchar_encoding();
+
+ // fast path: no conversion required
+ if (encoding == wchar_encoding) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // only endian-swapping is required
+ if (need_endian_swap_utf(encoding, wchar_encoding)) return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
+
+ // source encoding is utf8
+ if (encoding == encoding_utf8) return convert_buffer_utf8(out_buffer, out_length, contents, size);
+
+ // source encoding is utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+ }
+
+ // source encoding is utf32
+ if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf32(out_buffer, out_length, contents, size, opt_true());
+ }
+
+ assert(!"Invalid encoding");
+ return false;
+ }
+#else
+ template <typename opt_swap> bool convert_buffer_utf16(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ {
+ const uint16_t* data = static_cast<const uint16_t*>(contents);
+ size_t length = size / sizeof(uint16_t);
+
+ // first pass: get length in utf8 units
+ out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf16_block(data, length, 0);
+
+ // allocate buffer of suitable length
+ out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
+ if (!out_buffer) return false;
+
+ // second pass: convert utf16 input to utf8
+ uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
+ uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf16_block(data, length, out_begin);
+
+ assert(out_end == out_begin + out_length);
+ (void)!out_end;
+
+ return true;
+ }
+
+ template <typename opt_swap> bool convert_buffer_utf32(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, opt_swap)
+ {
+ const uint32_t* data = static_cast<const uint32_t*>(contents);
+ size_t length = size / sizeof(uint32_t);
+
+ // first pass: get length in utf8 units
+ out_length = utf_decoder<utf8_counter, opt_swap>::decode_utf32_block(data, length, 0);
+
+ // allocate buffer of suitable length
+ out_buffer = static_cast<char_t*>(global_allocate((out_length > 0 ? out_length : 1) * sizeof(char_t)));
+ if (!out_buffer) return false;
+
+ // second pass: convert utf32 input to utf8
+ uint8_t* out_begin = reinterpret_cast<uint8_t*>(out_buffer);
+ uint8_t* out_end = utf_decoder<utf8_writer, opt_swap>::decode_utf32_block(data, length, out_begin);
+
+ assert(out_end == out_begin + out_length);
+ (void)!out_end;
+
+ return true;
+ }
+
+ bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
+ {
+ // fast path: no conversion required
+ if (encoding == encoding_utf8) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
+
+ // source encoding is utf16
+ if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
+ {
+ xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
+
+ return (native_encoding == encoding) ?
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_false()) :
+ convert_buffer_utf16(out_buffer, out_length, contents, size, opt_true());
+ }
+
+ // source encodin...
[truncated message content] |