From: <vac...@us...> - 2009-06-02 16:42:52
|
Revision: 150 http://xmlwrapp.svn.sourceforge.net/xmlwrapp/?rev=150&view=rev Author: vaclavslavik Date: 2009-06-02 16:42:51 +0000 (Tue, 02 Jun 2009) Log Message: ----------- make extern "C" callback functions static so that they are not exported as globally visible symbols Modified Paths: -------------- trunk/src/libxml/dtd_impl.cxx trunk/src/libxml/event_parser.cxx trunk/src/libxml/init.cxx trunk/src/libxml/tree_parser.cxx trunk/src/libxslt/init.cxx trunk/src/libxslt/stylesheet.cxx Modified: trunk/src/libxml/dtd_impl.cxx =================================================================== --- trunk/src/libxml/dtd_impl.cxx 2009-06-02 16:20:11 UTC (rev 149) +++ trunk/src/libxml/dtd_impl.cxx 2009-06-02 16:42:51 UTC (rev 150) @@ -51,10 +51,24 @@ using namespace xml::impl; //#################################################################### -namespace { - extern "C" void dtd_error (void *ctxt, const char *message, ...); - extern "C" void dtd_warning (void *ctxt, const char*, ...); -} // end anonymous namespace +extern "C" +{ + //#################################################################### + static void dtd_error (void *ctxt, const char *message, ...) { + dtd_impl *dtd = static_cast<dtd_impl*>(ctxt); + + va_list ap; + va_start(ap, message); + printf2string(dtd->error_, message, ap); + va_end(ap); + } + //#################################################################### + static void dtd_warning (void *ctxt, const char*, ...) { + dtd_impl *dtd = static_cast<dtd_impl*>(ctxt); + ++dtd->warnings_; + } + //#################################################################### +} //#################################################################### dtd_impl::dtd_impl (const char *filename) : warnings_(0), dtd_(0) { if ( (dtd_ = xmlParseDTD(0, reinterpret_cast<const xmlChar*>(filename))) == 0) { @@ -89,22 +103,3 @@ dtd_ = 0; return xmldtd; } -//#################################################################### -namespace { - //#################################################################### - extern "C" void dtd_error (void *ctxt, const char *message, ...) { - dtd_impl *dtd = static_cast<dtd_impl*>(ctxt); - - va_list ap; - va_start(ap, message); - printf2string(dtd->error_, message, ap); - va_end(ap); - } - //#################################################################### - extern "C" void dtd_warning (void *ctxt, const char*, ...) { - dtd_impl *dtd = static_cast<dtd_impl*>(ctxt); - ++dtd->warnings_; - } - //#################################################################### -} -//#################################################################### Modified: trunk/src/libxml/event_parser.cxx =================================================================== --- trunk/src/libxml/event_parser.cxx 2009-06-02 16:20:11 UTC (rev 149) +++ trunk/src/libxml/event_parser.cxx 2009-06-02 16:42:51 UTC (rev 150) @@ -70,18 +70,8 @@ //#################################################################### namespace { const std::size_t const_buffer_size = 4096; +} - extern "C" xmlEntityPtr cb_get_entity (void *, const xmlChar *name); - extern "C" void cb_start_element (void *parser, const xmlChar *tag, const xmlChar **props); - extern "C" void cb_end_element (void *parser, const xmlChar *tag); - extern "C" void cb_text (void *parser, const xmlChar *text, int length); - extern "C" void cb_pi (void *parser, const xmlChar *target, const xmlChar *data); - extern "C" void cb_comment (void *parser, const xmlChar *text); - extern "C" void cb_cdata (void *parser, const xmlChar *text, int length); - extern "C" void cb_warning (void *parser, const char *message, ...); - extern "C" void cb_error (void *parser, const char *message, ...); - extern "C" void cb_ignore (void*, const xmlChar*, int); -} // end anonymous namespace //#################################################################### struct xml::impl::epimpl { public: @@ -107,6 +97,55 @@ epimpl (const epimpl&); epimpl& operator= (const epimpl&); }; + +extern "C" +{ + //#################################################################### + static void cb_start_element (void *parser, const xmlChar *tag, const xmlChar **props) + { static_cast<epimpl*>(parser)->event_start_element(tag, props); } + //#################################################################### + static void cb_end_element (void *parser, const xmlChar *tag) + { static_cast<epimpl*>(parser)->event_end_element(tag); } + //#################################################################### + static void cb_text (void *parser, const xmlChar *text, int length) + { static_cast<epimpl*>(parser)->event_text(text, length); } + //#################################################################### + static void cb_pi (void *parser, const xmlChar *target, const xmlChar *data) + { static_cast<epimpl*>(parser)->event_pi(target, data); } + //#################################################################### + static void cb_comment (void *parser, const xmlChar *text) + { static_cast<epimpl*>(parser)->event_comment(text); } + //#################################################################### + static void cb_cdata (void *parser, const xmlChar *text, int length) + { static_cast<epimpl*>(parser)->event_cdata(text, length); } + //#################################################################### + static void cb_warning (void *parser, const char *message, ...) { + std::string complete_message; + + va_list ap; + va_start(ap, message); + printf2string(complete_message, message, ap); + va_end(ap); + + static_cast<epimpl*>(parser)->event_warning(complete_message); + } + //#################################################################### + static void cb_error (void *parser, const char *message, ...) { + std::string complete_message; + + va_list ap; + va_start(ap, message); + printf2string(complete_message, message, ap); + va_end(ap); + + static_cast<epimpl*>(parser)->event_error(complete_message); + } + //#################################################################### + static void cb_ignore (void*, const xmlChar*, int) { + return; + } +} // extern "C" + //#################################################################### xml::event_parser::event_parser (void) { pimpl_ = new epimpl(*this); @@ -302,53 +341,3 @@ parser_status_ = false; xmlStopParser(parser_context_); } -//#################################################################### -namespace { - //#################################################################### - extern "C" xmlEntityPtr cb_get_entity (void *, const xmlChar *name) - { return xmlGetPredefinedEntity(name); } - //#################################################################### - extern "C" void cb_start_element (void *parser, const xmlChar *tag, const xmlChar **props) - { static_cast<epimpl*>(parser)->event_start_element(tag, props); } - //#################################################################### - extern "C" void cb_end_element (void *parser, const xmlChar *tag) - { static_cast<epimpl*>(parser)->event_end_element(tag); } - //#################################################################### - extern "C" void cb_text (void *parser, const xmlChar *text, int length) - { static_cast<epimpl*>(parser)->event_text(text, length); } - //#################################################################### - extern "C" void cb_pi (void *parser, const xmlChar *target, const xmlChar *data) - { static_cast<epimpl*>(parser)->event_pi(target, data); } - //#################################################################### - extern "C" void cb_comment (void *parser, const xmlChar *text) - { static_cast<epimpl*>(parser)->event_comment(text); } - //#################################################################### - extern "C" void cb_cdata (void *parser, const xmlChar *text, int length) - { static_cast<epimpl*>(parser)->event_cdata(text, length); } - //#################################################################### - extern "C" void cb_warning (void *parser, const char *message, ...) { - std::string complete_message; - - va_list ap; - va_start(ap, message); - printf2string(complete_message, message, ap); - va_end(ap); - - static_cast<epimpl*>(parser)->event_warning(complete_message); - } - //#################################################################### - extern "C" void cb_error (void *parser, const char *message, ...) { - std::string complete_message; - - va_list ap; - va_start(ap, message); - printf2string(complete_message, message, ap); - va_end(ap); - - static_cast<epimpl*>(parser)->event_error(complete_message); - } - //#################################################################### - extern "C" void cb_ignore (void*, const xmlChar*, int) { - return; - } -} // end anonymous namespace Modified: trunk/src/libxml/init.cxx =================================================================== --- trunk/src/libxml/init.cxx 2009-06-02 16:20:11 UTC (rev 149) +++ trunk/src/libxml/init.cxx 2009-06-02 16:42:51 UTC (rev 150) @@ -43,8 +43,12 @@ #include <libxml/parser.h> //#################################################################### -namespace { - extern "C" void xml_error (void *, const char*, ...); +extern "C" +{ + static void xml_error (void *, const char*, ...) + { + // don't do anything + } } //#################################################################### int xml::init::ms_counter = 0; @@ -98,8 +102,3 @@ xmlDoValidityCheckingDefaultValue = flag ? 1 : 0; } //#################################################################### -namespace { - extern "C" void xml_error (void*, const char*, ...) - { /* don't do anything */ } -} -//#################################################################### Modified: trunk/src/libxml/tree_parser.cxx =================================================================== --- trunk/src/libxml/tree_parser.cxx 2009-06-02 16:20:11 UTC (rev 149) +++ trunk/src/libxml/tree_parser.cxx 2009-06-02 16:42:51 UTC (rev 150) @@ -67,11 +67,14 @@ //#################################################################### namespace { const char const_default_error[] = "unknown XML parsing error"; +} - extern "C" void cb_tree_error (void *v, const char *message, ...); - extern "C" void cb_tree_warning (void *v, const char *, ...); - extern "C" void cb_tree_ignore (void*, const xmlChar*, int); +extern "C" { + static void cb_tree_error (void *v, const char *message, ...); + static void cb_tree_warning (void *v, const char *, ...); + static void cb_tree_ignore (void*, const xmlChar*, int); } + //#################################################################### struct xml::impl::tree_impl { tree_impl (void) : last_error_(const_default_error), warnings_(false), okay_(false) { @@ -91,7 +94,46 @@ bool warnings_; bool okay_; }; + //#################################################################### +extern "C" +{ +//#################################################################### +static void cb_tree_error (void *v, const char *message, ...) { +try { + + xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(v); + tree_impl *p = static_cast<tree_impl*>(ctxt->_private); + if (!p) return; // handle bug in older versions of libxml + + va_list ap; + va_start(ap, message); + printf2string(p->last_error_, message, ap); + va_end(ap); + + xmlStopParser(ctxt); +} catch (...) { } +} +//#################################################################### +static void cb_tree_warning (void *v, const char *, ...) { +try { + + xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(v); + tree_impl *p = static_cast<tree_impl*>(ctxt->_private); + if (!p) return; // handle bug in older versions of libxml + + p->warnings_ = true; + +} catch (...) { } +} +//#################################################################### +static void cb_tree_ignore (void*, const xmlChar*, int) { +return; +} + +} // extern "C" + +//#################################################################### xml::tree_parser::tree_parser (const char *name, bool allow_exceptions) { std::auto_ptr<tree_impl> ap(pimpl_ = new tree_impl); @@ -162,40 +204,3 @@ const xml::document& xml::tree_parser::get_document (void) const { return pimpl_->doc_; } -//#################################################################### -namespace { - //#################################################################### - extern "C" void cb_tree_error (void *v, const char *message, ...) { - try { - - xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(v); - tree_impl *p = static_cast<tree_impl*>(ctxt->_private); - if (!p) return; // handle bug in older versions of libxml - - va_list ap; - va_start(ap, message); - printf2string(p->last_error_, message, ap); - va_end(ap); - - xmlStopParser(ctxt); - } catch (...) { } - } - //#################################################################### - extern "C" void cb_tree_warning (void *v, const char *, ...) { - try { - - xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(v); - tree_impl *p = static_cast<tree_impl*>(ctxt->_private); - if (!p) return; // handle bug in older versions of libxml - - p->warnings_ = true; - - } catch (...) { } - } - //#################################################################### - extern "C" void cb_tree_ignore (void*, const xmlChar*, int) { - return; - } - //#################################################################### -} -//#################################################################### Modified: trunk/src/libxslt/init.cxx =================================================================== --- trunk/src/libxslt/init.cxx 2009-06-02 16:20:11 UTC (rev 149) +++ trunk/src/libxslt/init.cxx 2009-06-02 16:42:51 UTC (rev 150) @@ -44,16 +44,16 @@ #include <libxslt/xsltutils.h> #include <libexslt/exslt.h> -namespace +extern "C" { -extern "C" void xslt_error(void *, const char*, ...) +static void xslt_error(void *, const char*, ...) { // don't do anything; we install context-specific error handler to // catch errors while applying a stylesheet } -} // anonymous namespace +} // extern "C" int xslt::init::ms_counter = 0; Modified: trunk/src/libxslt/stylesheet.cxx =================================================================== --- trunk/src/libxslt/stylesheet.cxx 2009-06-02 16:20:11 UTC (rev 149) +++ trunk/src/libxslt/stylesheet.cxx 2009-06-02 16:42:51 UTC (rev 150) @@ -121,8 +121,11 @@ } -extern "C" void xsltwrapp_error_cb(void *c, const char *message, ...) +extern "C" { + +static void error_cb(void *c, const char *message, ...) +{ xsltTransformContextPtr ctxt = static_cast<xsltTransformContextPtr>(c); xslt::stylesheet::pimpl *impl = static_cast<xslt::stylesheet::pimpl*>(ctxt->_private); @@ -146,6 +149,7 @@ impl->error_.append(formatted); } +} // extern "C" xmlDocPtr apply_stylesheet(xslt::stylesheet::pimpl *impl, xmlDocPtr doc, @@ -159,7 +163,7 @@ xsltTransformContextPtr ctxt = xsltNewTransformContext(style, doc); ctxt->_private = impl; - xsltSetTransformErrorFunc(ctxt, ctxt, xsltwrapp_error_cb); + xsltSetTransformErrorFunc(ctxt, ctxt, error_cb); // clear the error flag before applying the stylesheet impl->errors_occured_ = false; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <tbr...@us...> - 2012-03-19 23:02:56
|
Revision: 195 http://xmlwrapp.svn.sourceforge.net/xmlwrapp/?rev=195&view=rev Author: tbrowder2 Date: 2012-03-19 23:02:47 +0000 (Mon, 19 Mar 2012) Log Message: ----------- change C++ source file suffix from .cxx to .cc Added Paths: ----------- trunk/src/libxml/ait_impl.cc trunk/src/libxml/attributes.cc trunk/src/libxml/document.cc trunk/src/libxml/dtd_impl.cc trunk/src/libxml/event_parser.cc trunk/src/libxml/init.cc trunk/src/libxml/node.cc trunk/src/libxml/node_iterator.cc trunk/src/libxml/node_manip.cc trunk/src/libxml/nodes_view.cc trunk/src/libxml/tree_parser.cc trunk/src/libxml/utility.cc trunk/src/libxslt/init.cc trunk/src/libxslt/stylesheet.cc Removed Paths: ------------- trunk/src/libxml/ait_impl.cxx trunk/src/libxml/attributes.cxx trunk/src/libxml/document.cxx trunk/src/libxml/dtd_impl.cxx trunk/src/libxml/event_parser.cxx trunk/src/libxml/init.cxx trunk/src/libxml/node.cxx trunk/src/libxml/node_iterator.cxx trunk/src/libxml/node_manip.cxx trunk/src/libxml/nodes_view.cxx trunk/src/libxml/tree_parser.cxx trunk/src/libxml/utility.cxx trunk/src/libxslt/init.cxx trunk/src/libxslt/stylesheet.cxx Copied: trunk/src/libxml/ait_impl.cc (from rev 186, trunk/src/libxml/ait_impl.cxx) =================================================================== --- trunk/src/libxml/ait_impl.cc (rev 0) +++ trunk/src/libxml/ait_impl.cc 2012-03-19 23:02:47 UTC (rev 195) @@ -0,0 +1,459 @@ +/* + * Copyright (C) 2001-2003 Peter J Jones (pj...@pm...) + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of the Author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +// xmlwrapp includes +#include "ait_impl.h" +#include "utility.h" +#include "xmlwrapp/attributes.h" +#include "xmlwrapp/exception.h" + +// standard includes +#include <algorithm> + +// libxml2 includes +#include <libxml/tree.h> + +namespace xml +{ + +using namespace impl; + +// ------------------------------------------------------------------------ +// xml::impl::ait_impl +// ------------------------------------------------------------------------ + +namespace impl +{ + +ait_impl::ait_impl(xmlNodePtr node, xmlAttrPtr prop) + : xmlnode_(node), xmlattr_(prop), fake_(false) +{ + attr_.set_data(xmlnode_, xmlattr_); +} + + +ait_impl::ait_impl(const char *name, const char *value, bool) + : xmlnode_(0), xmlattr_(0), fake_(true) +{ + // in this constructor and in the functions to follow, the last + // parameter, the bool, is only used to create a unique signature + attr_.set_data(name, value, true); +} + + +ait_impl::ait_impl(const ait_impl& other) + : xmlnode_(other.xmlnode_), xmlattr_(other.xmlattr_), fake_(other.fake_) +{ + if (fake_) + attr_.set_data(other.attr_.get_name(), other.attr_.get_value(), true); + else + attr_.set_data(xmlnode_, xmlattr_); +} + + +ait_impl& ait_impl::operator=(const ait_impl& other) +{ + ait_impl tmp(other); + + std::swap(xmlnode_, tmp.xmlnode_); + std::swap(xmlattr_, tmp.xmlattr_); + std::swap(fake_, tmp.fake_); + attr_.swap(tmp.attr_); + + return *this; +} + + +attributes::attr* ait_impl::get() +{ + return &attr_; +} + + +xmlAttrPtr ait_impl::get_raw_attr() +{ + return xmlattr_; +} + + +ait_impl& ait_impl::operator++() +{ + if (xmlattr_) + xmlattr_ = xmlattr_->next; + else + fake_ = false; + + attr_.set_data(xmlnode_, xmlattr_); + return *this; +} + + +ait_impl ait_impl::operator++(int) +{ + ait_impl tmp(xmlnode_, xmlattr_); + ++(*this); + return tmp; +} + +} // namespace impl + + +// ------------------------------------------------------------------------ +// xml::attributes::iterator +// ------------------------------------------------------------------------ + +attributes::iterator::iterator() +{ + pimpl_ = new ait_impl(0, 0); +} + + +attributes::iterator::iterator(void *node, void *prop) +{ + pimpl_ = new ait_impl(static_cast<xmlNodePtr>(node), static_cast<xmlAttrPtr>(prop)); +} + + +attributes::iterator::iterator(const char *name, const char *value, bool) +{ + pimpl_ = new ait_impl(name, value, true); +} + + +attributes::iterator::iterator (const iterator &other) +{ + pimpl_ = new ait_impl(*other.pimpl_); +} + + +attributes::iterator& attributes::iterator::operator=(const iterator& other) +{ + iterator tmp(other); + swap(tmp); + return *this; +} + + +void attributes::iterator::swap(iterator& other) +{ + std::swap(pimpl_, other.pimpl_); +} + + +attributes::iterator::~iterator() +{ + delete pimpl_; +} + + +void* attributes::iterator::get_raw_attr() +{ + return pimpl_->get_raw_attr(); +} + + +attributes::iterator::reference attributes::iterator::operator*() const +{ + return *(pimpl_->get()); +} + + +attributes::iterator::pointer attributes::iterator::operator->() const +{ + return pimpl_->get(); +} + + +attributes::iterator& attributes::iterator::operator++() +{ + ++(*pimpl_); + return *this; +} + + +attributes::iterator attributes::iterator::operator++(int) +{ + iterator tmp(*this); + ++(*this); + return tmp; +} + + +// ------------------------------------------------------------------------ +// xml::attributes::const_iterator +// ------------------------------------------------------------------------ + +attributes::const_iterator::const_iterator() +{ + pimpl_ = new ait_impl(0, 0); +} + + +attributes::const_iterator::const_iterator(void *node, void *prop) +{ + pimpl_ = new ait_impl(static_cast<xmlNodePtr>(node), static_cast<xmlAttrPtr>(prop)); +} + + +attributes::const_iterator::const_iterator(const char *name, const char *value, bool) +{ + pimpl_ = new ait_impl(name, value, true); +} + + +attributes::const_iterator::const_iterator(const const_iterator& other) +{ + pimpl_ = new ait_impl(*other.pimpl_); +} + + +attributes::const_iterator::const_iterator(const iterator& other) +{ + pimpl_ = new ait_impl(*other.pimpl_); +} + + +attributes::const_iterator& attributes::const_iterator::operator=(const const_iterator& other) +{ + const_iterator tmp(other); + swap(tmp); + return *this; +} + + +void attributes::const_iterator::swap(const_iterator& other) +{ + std::swap(pimpl_, other.pimpl_); +} + + +attributes::const_iterator::~const_iterator() +{ + delete pimpl_; +} + + +void* attributes::const_iterator::get_raw_attr() +{ + return pimpl_->get_raw_attr(); +} + + +attributes::const_iterator::reference attributes::const_iterator::operator*() const +{ + return *(pimpl_->get()); +} + + +attributes::const_iterator::pointer attributes::const_iterator::operator->() const +{ + return pimpl_->get(); +} + + +attributes::const_iterator& attributes::const_iterator::operator++() +{ + ++(*pimpl_); + return *this; +} + + +attributes::const_iterator attributes::const_iterator::operator++(int) +{ + const_iterator tmp(*this); + ++(*this); + return tmp; +} + + +// ------------------------------------------------------------------------ +// xml::attributes::attr +// ------------------------------------------------------------------------ + +attributes::attr::attr() : node_(0), prop_(0) +{ +} + + +attributes::attr::attr(const attr& other) + : node_(other.node_), + prop_(other.prop_), + name_(other.name_), + value_(other.value_) +{ +} + + +attributes::attr& attributes::attr::operator=(const attr& other) +{ + attr tmp(other); + swap(tmp); + return *this; +} + + +void attributes::attr::swap(attr& other) +{ + std::swap(node_, other.node_); + std::swap(prop_, other.prop_); + name_.swap(other.name_); + value_.swap(other.value_); +} + + +void attributes::attr::set_data(void *node, void *prop) +{ + node_ = node; + prop_ = prop; + name_.erase(); + value_.erase(); +} + + +void attributes::attr::set_data(const char *name, const char *value, bool) +{ + node_ = 0; + prop_ = 0; + name_ = name; + value_ = value; +} + + +const char* attributes::attr::get_name() const +{ + if (!name_.empty()) + return name_.c_str(); // we were given a name not a node + + if (!node_ || !prop_) + throw xml::exception("access to invalid attributes::attr object!"); + + return reinterpret_cast<const char*>(static_cast<xmlAttrPtr>(prop_)->name); +} + + +const char* attributes::attr::get_value() const +{ + if (!value_.empty()) + return value_.c_str(); // we were given a value, not a node + + if (!node_ || !prop_) + throw xml::exception("access to invalid attributes::attr object!"); + + xmlChar *tmpstr = xmlNodeListGetString(reinterpret_cast<xmlNodePtr>(node_)->doc, reinterpret_cast<xmlAttrPtr>(prop_)->children, 1); + if (tmpstr == 0) + return ""; + + xmlchar_helper helper(tmpstr); + value_.assign(helper.get()); + return value_.c_str(); +} + +// ------------------------------------------------------------------------ +// helper friend functions and operators +// ------------------------------------------------------------------------ + +bool operator==(const attributes::iterator& lhs, const attributes::iterator& rhs) +{ + return *(lhs.pimpl_) == *(rhs.pimpl_); +} + +bool operator!=(const attributes::iterator& lhs, const attributes::iterator& rhs) +{ + return !(lhs == rhs); +} + +bool operator==(const attributes::const_iterator& lhs, const attributes::const_iterator& rhs) +{ + return *(lhs.pimpl_) == *(rhs.pimpl_); +} + +bool operator!=(const attributes::const_iterator& lhs, const attributes::const_iterator& rhs) +{ + return !(lhs == rhs); +} + + +namespace impl +{ + +xmlAttrPtr find_prop(xmlNodePtr xmlnode, const char *name) +{ + xmlAttrPtr prop = xmlnode->properties; + + for (; prop; prop = prop->next ) + { + if (xmlStrEqual(prop->name, reinterpret_cast<const xmlChar*>(name))) + return prop; + } + + return 0; +} + + +xmlAttributePtr find_default_prop(xmlNodePtr xmlnode, const char *name) +{ + if (xmlnode->doc != 0) + { + xmlAttributePtr dtd_attr=0; + + if (xmlnode->doc->intSubset != 0) + { + dtd_attr = xmlGetDtdAttrDesc(xmlnode->doc->intSubset, xmlnode->name, reinterpret_cast<const xmlChar*>(name)); + } + + if (dtd_attr == 0 && xmlnode->doc->extSubset != 0) + { + dtd_attr = xmlGetDtdAttrDesc(xmlnode->doc->extSubset, xmlnode->name, reinterpret_cast<const xmlChar*>(name)); + } + + if (dtd_attr != 0 && dtd_attr->defaultValue != 0) + return dtd_attr; + } + + return 0; +} + +bool operator==(const ait_impl& lhs, const ait_impl& rhs) +{ + if (lhs.fake_ || rhs.fake_) + return false; + return lhs.xmlattr_ == rhs.xmlattr_; +} + +bool operator!=(const ait_impl& lhs, const ait_impl& rhs) +{ + return !(lhs == rhs); +} + +} // namespace impl + +} // namespace xml Deleted: trunk/src/libxml/ait_impl.cxx =================================================================== --- trunk/src/libxml/ait_impl.cxx 2012-03-19 19:25:15 UTC (rev 194) +++ trunk/src/libxml/ait_impl.cxx 2012-03-19 23:02:47 UTC (rev 195) @@ -1,459 +0,0 @@ -/* - * Copyright (C) 2001-2003 Peter J Jones (pj...@pm...) - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of the Author nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR - * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -// xmlwrapp includes -#include "ait_impl.h" -#include "utility.h" -#include "xmlwrapp/attributes.h" -#include "xmlwrapp/exception.h" - -// standard includes -#include <algorithm> - -// libxml2 includes -#include <libxml/tree.h> - -namespace xml -{ - -using namespace impl; - -// ------------------------------------------------------------------------ -// xml::impl::ait_impl -// ------------------------------------------------------------------------ - -namespace impl -{ - -ait_impl::ait_impl(xmlNodePtr node, xmlAttrPtr prop) - : xmlnode_(node), xmlattr_(prop), fake_(false) -{ - attr_.set_data(xmlnode_, xmlattr_); -} - - -ait_impl::ait_impl(const char *name, const char *value, bool) - : xmlnode_(0), xmlattr_(0), fake_(true) -{ - // in this constructor and in the functions to follow, the last - // parameter, the bool, is only used to create a unique signature - attr_.set_data(name, value, true); -} - - -ait_impl::ait_impl(const ait_impl& other) - : xmlnode_(other.xmlnode_), xmlattr_(other.xmlattr_), fake_(other.fake_) -{ - if (fake_) - attr_.set_data(other.attr_.get_name(), other.attr_.get_value(), true); - else - attr_.set_data(xmlnode_, xmlattr_); -} - - -ait_impl& ait_impl::operator=(const ait_impl& other) -{ - ait_impl tmp(other); - - std::swap(xmlnode_, tmp.xmlnode_); - std::swap(xmlattr_, tmp.xmlattr_); - std::swap(fake_, tmp.fake_); - attr_.swap(tmp.attr_); - - return *this; -} - - -attributes::attr* ait_impl::get() -{ - return &attr_; -} - - -xmlAttrPtr ait_impl::get_raw_attr() -{ - return xmlattr_; -} - - -ait_impl& ait_impl::operator++() -{ - if (xmlattr_) - xmlattr_ = xmlattr_->next; - else - fake_ = false; - - attr_.set_data(xmlnode_, xmlattr_); - return *this; -} - - -ait_impl ait_impl::operator++(int) -{ - ait_impl tmp(xmlnode_, xmlattr_); - ++(*this); - return tmp; -} - -} // namespace impl - - -// ------------------------------------------------------------------------ -// xml::attributes::iterator -// ------------------------------------------------------------------------ - -attributes::iterator::iterator() -{ - pimpl_ = new ait_impl(0, 0); -} - - -attributes::iterator::iterator(void *node, void *prop) -{ - pimpl_ = new ait_impl(static_cast<xmlNodePtr>(node), static_cast<xmlAttrPtr>(prop)); -} - - -attributes::iterator::iterator(const char *name, const char *value, bool) -{ - pimpl_ = new ait_impl(name, value, true); -} - - -attributes::iterator::iterator (const iterator &other) -{ - pimpl_ = new ait_impl(*other.pimpl_); -} - - -attributes::iterator& attributes::iterator::operator=(const iterator& other) -{ - iterator tmp(other); - swap(tmp); - return *this; -} - - -void attributes::iterator::swap(iterator& other) -{ - std::swap(pimpl_, other.pimpl_); -} - - -attributes::iterator::~iterator() -{ - delete pimpl_; -} - - -void* attributes::iterator::get_raw_attr() -{ - return pimpl_->get_raw_attr(); -} - - -attributes::iterator::reference attributes::iterator::operator*() const -{ - return *(pimpl_->get()); -} - - -attributes::iterator::pointer attributes::iterator::operator->() const -{ - return pimpl_->get(); -} - - -attributes::iterator& attributes::iterator::operator++() -{ - ++(*pimpl_); - return *this; -} - - -attributes::iterator attributes::iterator::operator++(int) -{ - iterator tmp(*this); - ++(*this); - return tmp; -} - - -// ------------------------------------------------------------------------ -// xml::attributes::const_iterator -// ------------------------------------------------------------------------ - -attributes::const_iterator::const_iterator() -{ - pimpl_ = new ait_impl(0, 0); -} - - -attributes::const_iterator::const_iterator(void *node, void *prop) -{ - pimpl_ = new ait_impl(static_cast<xmlNodePtr>(node), static_cast<xmlAttrPtr>(prop)); -} - - -attributes::const_iterator::const_iterator(const char *name, const char *value, bool) -{ - pimpl_ = new ait_impl(name, value, true); -} - - -attributes::const_iterator::const_iterator(const const_iterator& other) -{ - pimpl_ = new ait_impl(*other.pimpl_); -} - - -attributes::const_iterator::const_iterator(const iterator& other) -{ - pimpl_ = new ait_impl(*other.pimpl_); -} - - -attributes::const_iterator& attributes::const_iterator::operator=(const const_iterator& other) -{ - const_iterator tmp(other); - swap(tmp); - return *this; -} - - -void attributes::const_iterator::swap(const_iterator& other) -{ - std::swap(pimpl_, other.pimpl_); -} - - -attributes::const_iterator::~const_iterator() -{ - delete pimpl_; -} - - -void* attributes::const_iterator::get_raw_attr() -{ - return pimpl_->get_raw_attr(); -} - - -attributes::const_iterator::reference attributes::const_iterator::operator*() const -{ - return *(pimpl_->get()); -} - - -attributes::const_iterator::pointer attributes::const_iterator::operator->() const -{ - return pimpl_->get(); -} - - -attributes::const_iterator& attributes::const_iterator::operator++() -{ - ++(*pimpl_); - return *this; -} - - -attributes::const_iterator attributes::const_iterator::operator++(int) -{ - const_iterator tmp(*this); - ++(*this); - return tmp; -} - - -// ------------------------------------------------------------------------ -// xml::attributes::attr -// ------------------------------------------------------------------------ - -attributes::attr::attr() : node_(0), prop_(0) -{ -} - - -attributes::attr::attr(const attr& other) - : node_(other.node_), - prop_(other.prop_), - name_(other.name_), - value_(other.value_) -{ -} - - -attributes::attr& attributes::attr::operator=(const attr& other) -{ - attr tmp(other); - swap(tmp); - return *this; -} - - -void attributes::attr::swap(attr& other) -{ - std::swap(node_, other.node_); - std::swap(prop_, other.prop_); - name_.swap(other.name_); - value_.swap(other.value_); -} - - -void attributes::attr::set_data(void *node, void *prop) -{ - node_ = node; - prop_ = prop; - name_.erase(); - value_.erase(); -} - - -void attributes::attr::set_data(const char *name, const char *value, bool) -{ - node_ = 0; - prop_ = 0; - name_ = name; - value_ = value; -} - - -const char* attributes::attr::get_name() const -{ - if (!name_.empty()) - return name_.c_str(); // we were given a name not a node - - if (!node_ || !prop_) - throw xml::exception("access to invalid attributes::attr object!"); - - return reinterpret_cast<const char*>(static_cast<xmlAttrPtr>(prop_)->name); -} - - -const char* attributes::attr::get_value() const -{ - if (!value_.empty()) - return value_.c_str(); // we were given a value, not a node - - if (!node_ || !prop_) - throw xml::exception("access to invalid attributes::attr object!"); - - xmlChar *tmpstr = xmlNodeListGetString(reinterpret_cast<xmlNodePtr>(node_)->doc, reinterpret_cast<xmlAttrPtr>(prop_)->children, 1); - if (tmpstr == 0) - return ""; - - xmlchar_helper helper(tmpstr); - value_.assign(helper.get()); - return value_.c_str(); -} - -// ------------------------------------------------------------------------ -// helper friend functions and operators -// ------------------------------------------------------------------------ - -bool operator==(const attributes::iterator& lhs, const attributes::iterator& rhs) -{ - return *(lhs.pimpl_) == *(rhs.pimpl_); -} - -bool operator!=(const attributes::iterator& lhs, const attributes::iterator& rhs) -{ - return !(lhs == rhs); -} - -bool operator==(const attributes::const_iterator& lhs, const attributes::const_iterator& rhs) -{ - return *(lhs.pimpl_) == *(rhs.pimpl_); -} - -bool operator!=(const attributes::const_iterator& lhs, const attributes::const_iterator& rhs) -{ - return !(lhs == rhs); -} - - -namespace impl -{ - -xmlAttrPtr find_prop(xmlNodePtr xmlnode, const char *name) -{ - xmlAttrPtr prop = xmlnode->properties; - - for (; prop; prop = prop->next ) - { - if (xmlStrEqual(prop->name, reinterpret_cast<const xmlChar*>(name))) - return prop; - } - - return 0; -} - - -xmlAttributePtr find_default_prop(xmlNodePtr xmlnode, const char *name) -{ - if (xmlnode->doc != 0) - { - xmlAttributePtr dtd_attr=0; - - if (xmlnode->doc->intSubset != 0) - { - dtd_attr = xmlGetDtdAttrDesc(xmlnode->doc->intSubset, xmlnode->name, reinterpret_cast<const xmlChar*>(name)); - } - - if (dtd_attr == 0 && xmlnode->doc->extSubset != 0) - { - dtd_attr = xmlGetDtdAttrDesc(xmlnode->doc->extSubset, xmlnode->name, reinterpret_cast<const xmlChar*>(name)); - } - - if (dtd_attr != 0 && dtd_attr->defaultValue != 0) - return dtd_attr; - } - - return 0; -} - -bool operator==(const ait_impl& lhs, const ait_impl& rhs) -{ - if (lhs.fake_ || rhs.fake_) - return false; - return lhs.xmlattr_ == rhs.xmlattr_; -} - -bool operator!=(const ait_impl& lhs, const ait_impl& rhs) -{ - return !(lhs == rhs); -} - -} // namespace impl - -} // namespace xml Copied: trunk/src/libxml/attributes.cc (from rev 186, trunk/src/libxml/attributes.cxx) =================================================================== --- trunk/src/libxml/attributes.cc (rev 0) +++ trunk/src/libxml/attributes.cc 2012-03-19 23:02:47 UTC (rev 195) @@ -0,0 +1,266 @@ +/* + * Copyright (C) 2001-2003 Peter J Jones (pj...@pm...) + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of the Author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +// xmlwrapp includes +#include "xmlwrapp/attributes.h" +#include "ait_impl.h" +#include "pimpl_base.h" + +// standard includes +#include <new> +#include <algorithm> + +// libxml2 includes +#include <libxml/tree.h> + +namespace xml +{ + +using namespace xml::impl; + +// ------------------------------------------------------------------------ +// xml::attributes::pimpl +// ------------------------------------------------------------------------ + +struct attributes::pimpl : public pimpl_base<attributes::pimpl> +{ + pimpl() : owner_(true) + { + xmlnode_ = xmlNewNode(0, reinterpret_cast<const xmlChar*>("blank")); + if (!xmlnode_) + throw std::bad_alloc(); + } + + pimpl(xmlNodePtr node) : xmlnode_(node), owner_(false) {} + + pimpl(const pimpl& other) : owner_(true) + { + xmlnode_ = xmlNewNode(0, reinterpret_cast<const xmlChar*>("blank")); + if (!xmlnode_) + throw std::bad_alloc(); + + xmlAttrPtr i=other.xmlnode_->properties; + xmlAttrPtr copy; + + // work around bug in libxml + for ( ; i != 0; i = i->next ) + { + if ( (copy = xmlCopyProp(0, i)) == 0) + { + xmlFreeNode(xmlnode_); + throw std::bad_alloc(); + } + + copy->prev = 0; + copy->next = 0; + xmlAddChild(xmlnode_, reinterpret_cast<xmlNodePtr>(copy)); + } + } + + ~pimpl() + { + release(); + } + + void release() + { + if (owner_ && xmlnode_) + xmlFreeNode(xmlnode_); + } + + xmlNodePtr xmlnode_; + bool owner_; +}; + + +// ------------------------------------------------------------------------ +// xml::attributes +// ------------------------------------------------------------------------ + +attributes::attributes() +{ + pimpl_ = new pimpl; +} + + +attributes::attributes(int) +{ + pimpl_ = new pimpl(0); +} + + +attributes::attributes(const attributes& other) +{ + pimpl_ = new pimpl(*other.pimpl_); +} + + +attributes& attributes::operator=(const attributes& other) +{ + attributes tmp(other); + swap(tmp); + return *this; +} + + +void attributes::swap(attributes& other) +{ + std::swap(pimpl_, other.pimpl_); +} + + +attributes::~attributes() +{ + delete pimpl_; +} + + +void* attributes::get_data() +{ + return pimpl_->xmlnode_; +} + + +void attributes::set_data(void *node) +{ + xmlNodePtr x = static_cast<xmlNodePtr>(node); + + pimpl_->release(); + pimpl_->owner_ = false; + pimpl_->xmlnode_ = x; +} + + +attributes::iterator attributes::begin() +{ + return iterator(pimpl_->xmlnode_, pimpl_->xmlnode_->properties); +} + + +attributes::const_iterator attributes::begin() const +{ + return const_iterator(pimpl_->xmlnode_, pimpl_->xmlnode_->properties); +} + + +attributes::iterator attributes::end() +{ + return iterator(); +} + + +attributes::const_iterator attributes::end() const +{ + return const_iterator(); +} + + +void attributes::insert(const char *name, const char *value) +{ + xmlSetProp(pimpl_->xmlnode_, + reinterpret_cast<const xmlChar*>(name), + reinterpret_cast<const xmlChar*>(value)); +} + + +attributes::iterator attributes::find(const char *name) +{ + xmlAttrPtr prop = find_prop(pimpl_->xmlnode_, name); + if ( prop != 0 ) + return iterator(pimpl_->xmlnode_, prop); + + xmlAttributePtr dtd_prop = find_default_prop(pimpl_->xmlnode_, name); + if ( dtd_prop != 0 ) + return iterator(name, reinterpret_cast<const char*>(dtd_prop->defaultValue), true); + + return iterator(); +} + + +attributes::const_iterator attributes::find(const char *name) const +{ + xmlAttrPtr prop = find_prop(pimpl_->xmlnode_, name); + if (prop != 0) + return const_iterator(pimpl_->xmlnode_, prop); + + xmlAttributePtr dtd_prop = find_default_prop(pimpl_->xmlnode_, name); + + if (dtd_prop != 0) + { + return const_iterator(name, reinterpret_cast<const char*>(dtd_prop->defaultValue), true); + } + + return const_iterator(); +} + + +attributes::iterator attributes::erase (iterator to_erase) +{ + xmlNodePtr prop = static_cast<xmlNodePtr>(to_erase.get_raw_attr()); + if (prop == 0) + return iterator(); // handle fake and bad iterators + ++to_erase; + + xmlUnlinkNode(prop); + xmlFreeNode(prop); + + return to_erase; +} + + +void attributes::erase(const char *name) +{ + xmlUnsetProp(pimpl_->xmlnode_, reinterpret_cast<const xmlChar*>(name)); +} + + +bool attributes::empty() const +{ + return pimpl_->xmlnode_->properties == 0; +} + + +attributes::size_type attributes::size() const +{ + size_type count = 0; + + xmlAttrPtr prop = pimpl_->xmlnode_->properties; + while (prop != 0) + { + ++count; + prop = prop->next; + } + + return count; +} + +} // namespace xml Deleted: trunk/src/libxml/attributes.cxx =================================================================== --- trunk/src/libxml/attributes.cxx 2012-03-19 19:25:15 UTC (rev 194) +++ trunk/src/libxml/attributes.cxx 2012-03-19 23:02:47 UTC (rev 195) @@ -1,266 +0,0 @@ -/* - * Copyright (C) 2001-2003 Peter J Jones (pj...@pm...) - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of the Author nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR - * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -// xmlwrapp includes -#include "xmlwrapp/attributes.h" -#include "ait_impl.h" -#include "pimpl_base.h" - -// standard includes -#include <new> -#include <algorithm> - -// libxml2 includes -#include <libxml/tree.h> - -namespace xml -{ - -using namespace xml::impl; - -// ------------------------------------------------------------------------ -// xml::attributes::pimpl -// ------------------------------------------------------------------------ - -struct attributes::pimpl : public pimpl_base<attributes::pimpl> -{ - pimpl() : owner_(true) - { - xmlnode_ = xmlNewNode(0, reinterpret_cast<const xmlChar*>("blank")); - if (!xmlnode_) - throw std::bad_alloc(); - } - - pimpl(xmlNodePtr node) : xmlnode_(node), owner_(false) {} - - pimpl(const pimpl& other) : owner_(true) - { - xmlnode_ = xmlNewNode(0, reinterpret_cast<const xmlChar*>("blank")); - if (!xmlnode_) - throw std::bad_alloc(); - - xmlAttrPtr i=other.xmlnode_->properties; - xmlAttrPtr copy; - - // work around bug in libxml - for ( ; i != 0; i = i->next ) - { - if ( (copy = xmlCopyProp(0, i)) == 0) - { - xmlFreeNode(xmlnode_); - throw std::bad_alloc(); - } - - copy->prev = 0; - copy->next = 0; - xmlAddChild(xmlnode_, reinterpret_cast<xmlNodePtr>(copy)); - } - } - - ~pimpl() - { - release(); - } - - void release() - { - if (owner_ && xmlnode_) - xmlFreeNode(xmlnode_); - } - - xmlNodePtr xmlnode_; - bool owner_; -}; - - -// ------------------------------------------------------------------------ -// xml::attributes -// ------------------------------------------------------------------------ - -attributes::attributes() -{ - pimpl_ = new pimpl; -} - - -attributes::attributes(int) -{ - pimpl_ = new pimpl(0); -} - - -attributes::attributes(const attributes& other) -{ - pimpl_ = new pimpl(*other.pimpl_); -} - - -attributes& attributes::operator=(const attributes& other) -{ - attributes tmp(other); - swap(tmp); - return *this; -} - - -void attributes::swap(attributes& other) -{ - std::swap(pimpl_, other.pimpl_); -} - - -attributes::~attributes() -{ - delete pimpl_; -} - - -void* attributes::get_data() -{ - return pimpl_->xmlnode_; -} - - -void attributes::set_data(void *node) -{ - xmlNodePtr x = static_cast<xmlNodePtr>(node); - - pimpl_->release(); - pimpl_->owner_ = false; - pimpl_->xmlnode_ = x; -} - - -attributes::iterator attributes::begin() -{ - return iterator(pimpl_->xmlnode_, pimpl_->xmlnode_->properties); -} - - -attributes::const_iterator attributes::begin() const -{ - return const_iterator(pimpl_->xmlnode_, pimpl_->xmlnode_->properties); -} - - -attributes::iterator attributes::end() -{ - return iterator(); -} - - -attributes::const_iterator attributes::end() const -{ - return const_iterator(); -} - - -void attributes::insert(const char *name, const char *value) -{ - xmlSetProp(pimpl_->xmlnode_, - reinterpret_cast<const xmlChar*>(name), - reinterpret_cast<const xmlChar*>(value)); -} - - -attributes::iterator attributes::find(const char *name) -{ - xmlAttrPtr prop = find_prop(pimpl_->xmlnode_, name); - if ( prop != 0 ) - return iterator(pimpl_->xmlnode_, prop); - - xmlAttributePtr dtd_prop = find_default_prop(pimpl_->xmlnode_, name); - if ( dtd_prop != 0 ) - return iterator(name, reinterpret_cast<const char*>(dtd_prop->defaultValue), true); - - return iterator(); -} - - -attributes::const_iterator attributes::find(const char *name) const -{ - xmlAttrPtr prop = find_prop(pimpl_->xmlnode_, name); - if (prop != 0) - return const_iterator(pimpl_->xmlnode_, prop); - - xmlAttributePtr dtd_prop = find_default_prop(pimpl_->xmlnode_, name); - - if (dtd_prop != 0) - { - return const_iterator(name, reinterpret_cast<const char*>(dtd_prop->defaultValue), true); - } - - return const_iterator(); -} - - -attributes::iterator attributes::erase (iterator to_erase) -{ - xmlNodePtr prop = static_cast<xmlNodePtr>(to_erase.get_raw_attr()); - if (prop == 0) - return iterator(); // handle fake and bad iterators - ++to_erase; - - xmlUnlinkNode(prop); - xmlFreeNode(prop); - - return to_erase; -} - - -void attributes::erase(const char *name) -{ - xmlUnsetProp(pimpl_->xmlnode_, reinterpret_cast<const xmlChar*>(name)); -} - - -bool attributes::empty() const -{ - return pimpl_->xmlnode_->properties == 0; -} - - -attributes::size_type attributes::size() const -{ - size_type count = 0; - - xmlAttrPtr prop = pimpl_->xmlnode_->properties; - while (prop != 0) - { - ++count; - prop = prop->next; - } - - return count; -} - -} // namespace xml Copied: trunk/src/libxml/document.cc (from rev 186, trunk/src/libxml/document.cxx) =================================================================== --- trunk/src/libxml/document.cc (rev 0) +++ trunk/src/libxml/document.cc 2012-03-19 23:02:47 UTC (rev 195) @@ -0,0 +1,513 @@ +/* + * Copyright (C) 2001-2003 Peter J Jones (pj...@pm...) + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of the Author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +// xmlwrapp includes +#include "xmlwrapp/document.h" +#include "xmlwrapp/node.h" +#include "xmlwrapp/exception.h" + +#include "utility.h" +#include "dtd_impl.h" +#include "node_manip.h" + +// standard includes +#include <new> +#include <memory> +#include <iterator> +#include <iostream> +#include <algorithm> +#include <stdexcept> + +// libxml includes +#include <libxml/tree.h> +#include <libxml/xinclude.h> + +// bring in private libxslt stuff (see bug #1927398) +#include "../libxslt/result.h" + +namespace xml +{ + +using namespace impl; + +namespace +{ + const char DEFAULT_ENCODING[] = "ISO-8859-1"; +} + +// ------------------------------------------------------------------------ +// xml::impl::doc_impl +// ------------------------------------------------------------------------ + +namespace impl +{ + +struct doc_impl +{ + doc_impl() + : doc_(0), xslt_result_(0) + { + xmlDocPtr tmpdoc; + if ( (tmpdoc = xmlNewDoc(0)) == 0) + throw std::bad_alloc(); + set_doc_data(tmpdoc, true); + } + + + doc_impl(const char *root_name) + : doc_(0), xslt_result_(0), root_(root_name) + { + xmlDocPtr tmpdoc; + if ( (tmpdoc = xmlNewDoc(0)) == 0) + throw std::bad_alloc(); + set_doc_data(tmpdoc, true); + } + + + doc_impl(const doc_impl& other) + : doc_(0), xslt_result_(0) + { + xmlDocPtr tmpdoc; + if ( (tmpdoc = xmlCopyDoc(other.doc_, 1)) == 0) + throw std::bad_alloc(); + set_doc_data(tmpdoc, false); + } + + + void set_doc_data(xmlDocPtr newdoc, bool root_is_okay) + { + if (doc_) + xmlFreeDoc(doc_); + doc_ = newdoc; + + if (doc_->version) + version_ = reinterpret_cast<const char*>(doc_->version); + if (doc_->encoding) + encoding_ = reinterpret_cast<const char*>(doc_->encoding); + + if (root_is_okay) + { + xmlDocSetRootElement(doc_, static_cast<xmlNodePtr>(root_.release_node_data())); + } + else + { + xmlNodePtr libxml_root_node = xmlDocGetRootElement(doc_); + + if (libxml_root_node) + { + root_.set_node_data(libxml_root_node); + } + else + { + node tmpnode; + root_.swap(tmpnode); + + xmlDocSetRootElement(doc_, static_cast<xmlNodePtr>(root_.release_node_data())); + } + } + } + + + void set_root_node(const node& n) + { + node &non_const_node = const_cast<node&>(n); + xmlNodePtr new_root_node = xmlCopyNode(static_cast<xmlNodePtr>(non_const_node.get_node_data()), 1); + if (!new_root_node) + throw std::bad_alloc(); + + xmlNodePtr old_root_node = xmlDocSetRootElement(doc_, new_root_node); + root_.set_node_data(new_root_node); + if (old_root_node) + xmlFreeNode(old_root_node); + + xslt_result_ = 0; + } + + + ~doc_impl() + { + if (doc_) + xmlFreeDoc(doc_); + delete xslt_result_; + } + + xmlDocPtr doc_; + xslt::impl::result *xslt_result_; + node root_; + std::string version_; + mutable std::string encoding_; +}; + +} // namespace impl + + +// ------------------------------------------------------------------------ +// xml::document +// ------------------------------------------------------------------------ + +document::document() +{ + pimpl_ = new doc_impl; +} + + +document::document(const char *root_name) +{ + pimpl_ = new doc_impl(root_name); +} + + +document::document(const node& n) +{ + std::auto_ptr<doc_impl> ap(pimpl_ = new doc_impl); + pimpl_->set_root_node(n); + ap.release(); +} + + +document::document(const document& other) +{ + pimpl_ = new doc_impl(*(other.pimpl_)); +} + + +document& document::operator=(const document& other) +{ + document tmp(other); + swap(tmp); + return *this; +} + + +void document::swap(document& other) +{ + std::swap(pimpl_, other.pimpl_); +} + + +document::~document() +{ + delete pimpl_; +} + + +const node& document::get_root_node() const +{ + return pimpl_->root_; +} + + +node& document::get_root_node() +{ + return pimpl_->root_; +} + + +void document::set_root_node(const node& n) +{ + pimpl_->set_root_node(n); +} + + +const std::string& document::get_version() const +{ + return pimpl_->version_; +} + + +void document::set_version(const char *version) +{ + const xmlChar *old_version = pimpl_->doc_->version; + if ( (pimpl_->doc_->version = xmlStrdup(reinterpret_cast<const xmlChar*>(version))) == 0) + throw std::bad_alloc(); + + pimpl_->version_ = version; + if (old_version) + xmlFree(const_cast<char*>(reinterpret_cast<const char*>(old_version))); +} + + +const std::string& document::get_encoding() const +{ + if (pimpl_->encoding_.empty()) + pimpl_->encoding_ = DEFAULT_ENCODING; + return pimpl_->encoding_; +} + + +void document::set_encoding(const char *encoding) +{ + pimpl_->encoding_ = encoding; + + if (pimpl_->doc_->encoding) + xmlFree(const_cast<xmlChar*>(pimpl_->doc_->encoding)); + + pimpl_->doc_->encoding = xmlStrdup(reinterpret_cast<const xmlChar*>(encoding)); + + if (!pimpl_->doc_->encoding) + throw std::bad_alloc(); +} + + +bool document::get_is_standalone() const +{ + return pimpl_->doc_->standalone == 1; +} + + +void document::set_is_standalone(bool sa) +{ + pimpl_->doc_->standalone = sa ? 1 : 0; +} + + +bool document::process_xinclude() +{ + // xmlXIncludeProcess does not return what is says it does + return xmlXIncludeProcess(pimpl_->doc_) >= 0; +} + + +bool document::has_internal_subset() const +{ + return pimpl_->doc_->intSubset != 0; +} + + +bool document::has_external_subset() const +{ + return pimpl_->doc_->extSubset != 0; +} + + +bool document::validate() +{ + dtd_impl dtd; + return dtd.validate(pimpl_->doc_); +} + + +bool document::validate(const char *dtdname) +{ + dtd_impl dtd(dtdname); + + if (!dtd.error_.empty()) + return false; + if (!dtd.validate(pimpl_->doc_)) + return false; + + // remove the old DTD + if (pimpl_->doc_->extSubset != 0) + xmlFreeDtd(pimpl_->doc_->extSubset); + + pimpl_->doc_->extSubset = dtd.release(); + + return true; +} + + +document::size_type document::size() const +{ + using namespace std; + return distance(begin(), end()); +} + + +node::iterator document::begin() +{ + return node::iterator(pimpl_->doc_->children); +} + + +node::const_iterator document::begin() const +{ + return node::const_iterator(pimpl_->doc_->children); +} + + +node::iterator document::end() +{ + return node::iterator(0); +} + + +node::const_iterator document::end() const +{ + return node::const_iterator(0); +} + + +void document::push_back(const node& child) +{ + if (child.get_type() == node::type_element) + throw xml::exception("xml::document::push_back can't take element type nodes"); + + impl::node_insert + ( + reinterpret_cast<xmlNodePtr>(pimpl_->doc_), + 0, + static_cast<xmlNodePtr>(const_cast<node&>(child).get_node_data()) + ); +} + + +node::iterator document::insert(const node& n) +{ + if (n.get_type() == node::type_element) + throw xml::exception("xml::document::insert can't take element type nodes"); + + return node::iterator(xml::impl::node_insert(reinterpret_cast<xmlNodePtr>(pimpl_->doc_), 0, static_cast<xmlNodePtr>(const_cast<node&>(n).get_node_data()))); +} + + +node::iterator document::insert(node::iterator position, const node& n) +{ + if (n.get_type() == node::type_element) + throw xml::exception("xml::document::insert can't take element type nodes"); + + return node::iterator(xml::impl::node_insert(reinterpret_cast<xmlNodePtr>(pimpl_->doc_), static_cast<xmlNodePtr>(position.get_raw_node()), static_cast<xmlNodePtr>(const_cast<node&>(n).get_node_data()))); +} + + +node::iterator document::replace(node::iterator old_node, const node& new_node) +{ + if (old_node->get_type() == node::type_element || new_node.get_type() == node::type_element) + { + throw xml::exception("xml::document::replace can't replace element type nodes"); + } + + return node::iterator(xml::impl::node_replace(static_cast<xmlNodePtr>(old_node.get_raw_node()), static_cast<xmlNodePtr>(const_cast<node&>(new_node).get_node_data()))); +} + + +node::iterator document::erase(node::iterator to_erase) +{ + if (to_erase->get_type() == node::type_element) + throw xml::exception("xml::document::erase can't erase element type nodes"); + return node::iterator(xml::impl::node_erase(static_cast<xmlNodePtr>(to_erase.get_raw_node()))); +} + + +node::iterator document::erase(node::iterator first, node::iterator last) +{ + while (first != last) + first = erase(first); + return first; +} + + +void document::save_to_string(std::string& s) const +{ + xmlChar *xml_string; + int xml_string_length; + + if (pimpl_->xslt_result_ != 0) + { + pimpl_->xslt_result_->save_to_string(s); + return; + } + + const char *enc = pimpl_->encoding_.empty() ? 0 : pimpl_->encoding_.c_str(); + xmlDocDumpFormatMemoryEnc(pimpl_->doc_, &xml_string, &xml_string_length, enc, 1); + + xmlchar_helper helper(xml_string); + if (xml_string_length) + s.assign(helper.get(), xml_string_length); +} + + +bool document::save_to_file(const char *filename, int compression_level) const +{ + std::swap(pimpl_->doc_->compression, compression_level); + + if (pimpl_->xslt_result_ != 0) + { + bool rc = pimpl_->xslt_result_->save_to_file(filename, compression_level); + std::swap(pimpl_->doc_->compression, compression_level); + + return rc; + } + + const char *enc = pimpl_->encoding_.empty() ? 0 : pimpl_->encoding_.c_str(); + bool rc = xmlSaveFormatFileEnc(filename, pimpl_->doc_, enc, 1) > 0; + std::swap(pimpl_->doc_->compression, compression_level); + + return rc; +} + + +void document::set_doc_data(void *data) +{ + // we own the doc now, don't free it! + pimpl_->set_doc_data(static_cast<xmlDocPtr>(data), false); + pimpl_->xslt_result_ = 0; +} + + +void document::set_doc_data_from_xslt(void *data, xslt::impl::result *xr) +{ + // this document came from a XSLT transformation + pimpl_->set_doc_data(static_cast<xmlDocPtr>(data), false); + pimpl_->xslt_result_ = xr; +} + + +void* document::get_doc_data() +{ + return pimpl_->doc_; +} + + +void* document::get_doc_data_read_only() const +{ + return pimpl_->doc_; +} + + +void* document::release_doc_data() +{ + xmlDocPtr xmldoc = pimpl_->doc_; + pimpl_->doc_ = 0; + + return xmldoc; +} + + +std::ostream& operator<<(std::ostream& stream, const document& doc) +{ + std::string xmldata; + doc.save_to_string(xmldata); + stream << xmldata; + return stream; +} + +} // namespace xml Deleted: trunk/src/libxml/document.cxx =================================================================== --- trunk/src/libxml/document.cxx 2012-03-19 19:25:15 UTC (rev 194) +++ trunk/src/libxml/document.cxx 2012-03-19 23:02:47 UTC (rev 195) @@ -1,513 +0,0 @@ -/* - * Copyright (C) 2001-2003 Peter J Jones (pj...@pm...) - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * 3. Neither the name of the Author nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR - * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -// xmlwrapp includes -#include "xmlwrapp/document.h" -#include "xmlwrapp/node.h" -#include "xmlwrapp/exception.h" - -#include "utility.h" -#include "dtd_impl.h" -#include "node_manip.h" - -// standard includes -#include <new> -#include <memory> -#include <iterator> -#include <iostream> -#include <algorithm> -#include <stdexcept> - -// libxml includes -#include <libxml/tree.h> -#include <libxml/xinclude.h> - -// bring in private libxslt stuff (see bug #1927398) -#include "../libxslt/result.h" - -namespace xml -{ - -using namespace impl; - -namespace -{ - const char DEFAULT_ENCODING[] = "ISO-8859-1"; -} - -// ------------------------------------------------------------------------ -// xml::impl::doc_impl -// ------------------------------------------------------------------------ - -namespace impl -{ - -struct doc_impl -{ - doc_impl() - : doc_(0), xslt_result_(0) - { - xmlDocPtr tmpdoc; - if ( (tmpdoc = xmlNewDoc(0)) == 0) - throw std::bad_alloc(); - set_doc_data(tmpdoc, true); - } - - - doc_impl(const char *root_name) - : doc_(0), xslt_result_(0), root_(root_name) - { - xmlDocPtr tmpdoc; - if ( (tmpdoc = xmlNewDoc(0)) == 0) - throw std::bad_alloc(); - set_doc_data(tmpdoc, true); - } - - - doc_impl(const doc_impl& other) - : doc_(0), xslt_result_(0) - { - xmlDocPtr tmpdoc; - if ( (tmpdoc = xmlCopyDoc(other.doc_, 1)) == 0) - throw std::bad_alloc(); - set_doc_data(tmpdoc, false); - } - - - void set_doc_data(xmlDocPtr newdoc, bool root_is_okay) - { - if (doc_) - xmlFreeDoc(doc_); - doc_ = newdoc; - - if (doc_->version) - version_ = reinterpret_cast<const char*>(doc_->version); - if (doc_->encoding) - encoding_ = reinterpret_cast<const char*>(doc_->encoding); - - if (root_is_okay) - { - xmlDocSetRootElement(doc_, static_cast<xmlNodePtr>(root_.release_node_data())); - } - else - { - xmlNodePtr libxml_root_node = xmlDocGetRootElement(doc_); - - if (libxml_root_node) - { - root_.set_node_data(libxml_root_node); - } - else - { - node tmpnode; - root_.swap(tmpnode); - - xmlDocSetRootElement(doc_, static_cast<xmlNodePtr>(root_.release_node_data())); - } - } - } - - - void set_root_node(const node& n) - { - node &non_const_node = const_cast<node&>(n); - xmlNodePtr new_root_node = xmlCopyNode(static_cast<xmlNodePtr>(non_const_node.get_node_data()), 1); - if (!new_root_node) - throw std::bad_alloc(); - - xmlNodePtr old_root_node = xmlDocSetRootElement(doc_, new_root_node); - root_.set_node_data(new_root_node); - if (old_root_node) - xmlFreeNode(old_root_node); - - xslt_result_ = 0; - } - - - ~doc_impl() - { - if (doc_) - xmlFreeDoc(doc_); - delete xslt_result_; - } - - xmlDocPtr doc_; - xslt::impl::result *xslt_result_; - node root_; - std::string version_; - mutable std::string encoding_; -}; - -} // namespace impl - - -// ------------------------------------------------------------------------ -// xml::document -// ------------------------------------------------------------------------ - -document::document() -{ - pimpl_ = new doc_impl; -} - - -document::document(const char *root_name) -{ - pimpl_ = new doc_impl(root_name); -} - - -document::document(const node& n) -{ - std::auto_ptr<doc_impl> ap(pimpl_ = new doc_impl); - pimpl_->set_root_node(n); - ap.release(); -} - - -document::document(const document& other) -{ - pimpl_ = new doc_impl(*(other.pimpl_)); -} - - -document& document::operator=(const document& other) -{ - document tmp(other); - swap(tmp); - return *this; -} - - -void document::swap(document& other) -{ - std::swap(pimpl_, other.pimpl_); -} - - -document::~document() -{ - delete pimpl_; -} - - -const node& document::get_root_node() const -{ - return pimpl_->root_; -} - - -node& document::get_root_node() -{ - return pimpl_->root_; -} - - -void document::set_root_node(const node& n) -{ - pimpl_->set_root_node(n); -} - - -const std::string& document::get_version() const -{ - return pimpl_->version_; -} - - -void document::set_version(const char *version) -{ - const xmlChar *old_version = pimpl_->doc_->version; - if ( (pimpl_->doc_->version = xmlStrdup(reinterpret_cast<const xmlChar*>(version))) == 0) - throw std::bad_alloc(); - - pimpl_->version_ = version; - if (old_version) - xmlFree(const_cast<char*>(reinterpret_cast<const char*>(old_version))); -} - - -const std::string& document::get_encoding() const -{ - if (pimpl_->encoding_.empty()) - pimpl_->encoding_ = DEFAULT_ENCODING; - return pimpl_->encoding_; -} - - -void document::set_encoding(const char *encoding) -{ - pimpl_->encoding_ = encoding; - - if (pimpl_->doc_->encoding) - xmlFree(const_cast<xmlChar*>(pimpl_->doc_->encoding)); - - pimpl_->doc_->encoding = xmlStrdup(reinterpret_cast<const xmlChar*>(encoding)); - - if (!pimpl_->doc_->encoding) - throw std::bad_alloc(); -} - - -bool document::get_is_standalone() const -{ - return pimpl_->doc_->standalone == 1; -} - - -void document::set_is_standalone(bool sa) -{ - pimpl_->doc_->standalone = sa ? 1 : 0; -} - - -bool document::process_xinclude() -{ - // xmlXIncludeProcess does not return what is says it does - return xmlXIncludeProcess(pimpl_->doc_) >= 0; -} - - -bool document::has_internal_subset() const -{ - return pimpl_->doc_->intSubset != 0; -} - - -bool document::has_external_subset() const -{ - return pimpl_->doc_->extSubset != 0; -} - - -bool document::validate() -{ - dtd_impl dtd; - return dtd.validate(pimpl_->doc_); -} - - -bool document::validate(const char *dtdname) -{ - dtd_impl dtd(dtdname); - - if (!dtd.error_.empty()) - return false; - if (!dtd.validate(pimpl_->doc_)) - return false; - - // remove the old DTD - if (pimpl_->doc_->extSubset != 0) - xmlFreeDtd(pimpl_->doc_->extSubset); - - pimpl_->doc_->extSubset = dtd.release(); - - return true; -} - - -document::size_type document::size() const -{ - using namespace std; - return distance(begin(), end()); -} - - -node::iterator document::begin() -{ - return node::iterator(pimpl_->doc_->children); -} - - -node::const_iterator document::begin() const -{ - return node::const_iterator(pimpl_->doc_->children); -} - - -node::iterator document::end() -{ - return node::iterator(0); -} - - -node::const_iterator document::end() const -{ - return node::const_iterator(0); -} - - -void document::push_back(const node& child) -{ - if (child.get_type() == node::type_element) - throw xml::exception("xml::document::push_back can't take element type nodes"); - - impl::node_insert - ( - reinterpret_cast<xmlNodePtr>(pimpl_->doc_), - 0, - static_cast<xmlNodePtr>(const_cast<node&>(child).get_node_data()) - ); -} - - -node::iterator document::insert(const node& n) -{ - if (n.get_type() == node::type_element) - throw xml::exception("xml::document::insert can't take element type nodes"); - - return node::iterator(xml::impl::node_insert(reinterpret_cast<xmlNodePtr>(pimpl_->doc_), 0, static_cast<xmlNodePtr>(const_cast<node&>(n).get_node_data()))); -} - - -node::iterator document::insert(node::iterator position, const node& n) -{ - if (n.get_type() == node::type_element) - throw xml::exception("xml::document::insert can't take element type nodes"); - - return node::iterator(xml::impl::node_insert(reinterpret_cast<xmlNodePtr>(pimpl_->doc_), static_cast<xmlNodePtr>(position.get_raw_node()), static_cast<xmlNodePtr>(const_cast<node&>(n).get_node_data()))); -} - - -node::iterator document::replace(node::iterator old_node, const node& new_node) -{ - if (old_node->get_type() == node::type_element || new_node.get_type() == node::type_element) - { - throw xml::exception("xml::document::replace can't replace element type nodes"); - } - - return node::iterator(xml::impl::node_replace(static_cast<xmlNodePtr>(old_node.get_raw_node()), static_cast<xmlNodePtr>(const_cast<node&>(new_node).get_node_data()))); -} - - -node::iterator document::erase(node::iterator to_erase) -{ - if (to_erase->get_type() == node::type_element) - throw xml::exception("xml::document::erase can't erase element type nodes"); - return node::iterator(xml::impl::node_erase(static_cast<xmlNodePtr>(to_erase.get_raw_node()))); -} - - -n... [truncated message content] |
From: <tbr...@us...> - 2012-03-20 13:35:22
|
Revision: 200 http://xmlwrapp.svn.sourceforge.net/xmlwrapp/?rev=200&view=rev Author: tbrowder2 Date: 2012-03-20 13:35:13 +0000 (Tue, 20 Mar 2012) Log Message: ----------- add ustring source from glibmm project Modified Paths: -------------- trunk/src/Makefile.am Added Paths: ----------- trunk/src/libustring/ trunk/src/libustring/ustring.cc trunk/src/libustring/ustring.h Modified: trunk/src/Makefile.am =================================================================== --- trunk/src/Makefile.am 2012-03-20 13:21:45 UTC (rev 199) +++ trunk/src/Makefile.am 2012-03-20 13:35:13 UTC (rev 200) @@ -2,16 +2,24 @@ AM_CPPFLAGS = -I$(top_srcdir)/include $(CXXFLAGS_VISIBILITY) if WITH_XSLT -lib_LTLIBRARIES = libxmlwrapp.la libxsltwrapp.la +lib_LTLIBRARIES = libxmlwrapp.la libxsltwrapp.la libustring.la else -lib_LTLIBRARIES = libxmlwrapp.la +lib_LTLIBRARIES = libxmlwrapp.la libustring.la endif +libustring_la_CPPFLAGS = $(AM_CPPFLAGS) $(LIBXML_CFLAGS) +libustring_la_LIBADD = $(LIBXML_LIBS) +libustring_la_LDFLAGS = -version-info 6:0:1 -no-undefined + +libustring_la_SOURCES = \ + libustring/ustring.cc \ + libustring/ustring.h \ + libxmlwrapp_la_CPPFLAGS = $(AM_CPPFLAGS) $(LIBXML_CFLAGS) -libxmlwrapp_la_LIBADD = $(LIBXML_LIBS) -libxmlwrapp_la_LDFLAGS = -version-info 6:0:1 -no-undefined +libxmlwrapp_la_LIBADD = $(LIBXML_LIBS) +libxmlwrapp_la_LDFLAGS = -version-info 6:0:1 -no-undefined -libxmlwrapp_la_SOURCES = \ +libxmlwrapp_la_SOURCES = \ libxml/ait_impl.cc \ libxml/ait_impl.h \ libxml/attributes.cc \ Added: trunk/src/libustring/ustring.cc =================================================================== --- trunk/src/libustring/ustring.cc (rev 0) +++ trunk/src/libustring/ustring.cc 2012-03-20 13:35:13 UTC (rev 200) @@ -0,0 +1,1418 @@ +// -*- c++ -*- +/* $Id$ */ + +/* Copyright (C) 2002 The gtkmm Development Team + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <glibmmconfig.h> +#include <glibmm/ustring.h> +#include <glibmm/convert.h> +#include <glibmm/error.h> +#include <glibmm/utility.h> + +#include <algorithm> +#include <iostream> +#include <cstring> +# include <stdexcept> +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +namespace +{ + +using Glib::ustring; + +// Little helper to make the conversion from gunichar to UTF-8 a one-liner. +// +struct UnicharToUtf8 +{ + char buf[6]; + ustring::size_type len; + + explicit UnicharToUtf8(gunichar uc) + : len (g_unichar_to_utf8(uc, buf)) {} +}; + + +// All utf8_*_offset() functions return npos if offset is out of range. +// The caller should decide if npos is a valid argument and just marks +// the whole string, or if it is not allowed (e.g. for start positions). +// In the latter case std::out_of_range should be thrown, but usually +// std::string will do that for us. + +// First overload: stop on '\0' character. +static +ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset) +{ + if(offset == ustring::npos) + return ustring::npos; + + const char *const utf8_skip = g_utf8_skip; + const char* p = str; + + for(; offset != 0; --offset) + { + const unsigned int c = static_cast<unsigned char>(*p); + + if(c == 0) + return ustring::npos; + + p += utf8_skip[c]; + } + + return (p - str); +} + +// Second overload: stop when reaching maxlen. +static +ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset, + ustring::size_type maxlen) +{ + if(offset == ustring::npos) + return ustring::npos; + + const char *const utf8_skip = g_utf8_skip; + const char *const pend = str + maxlen; + const char* p = str; + + for(; offset != 0; --offset) + { + if(p >= pend) + return ustring::npos; + + p += utf8_skip[static_cast<unsigned char>(*p)]; + } + + return (p - str); +} + +// Third overload: stop when reaching str.size(). +// +inline +ustring::size_type utf8_byte_offset(const std::string& str, ustring::size_type offset) +{ + return utf8_byte_offset(str.data(), offset, str.size()); +} + +// Takes UTF-8 character offset and count in ci and cn. +// Returns the byte offset and count in i and n. +// +struct Utf8SubstrBounds +{ + ustring::size_type i; + ustring::size_type n; + + Utf8SubstrBounds(const std::string& str, ustring::size_type ci, ustring::size_type cn) + : + i (utf8_byte_offset(str, ci)), + n (ustring::npos) + { + if(i != ustring::npos) + n = utf8_byte_offset(str.data() + i, cn, str.size() - i); + } +}; + +// Converts byte offset to UTF-8 character offset. +inline +ustring::size_type utf8_char_offset(const std::string& str, ustring::size_type offset) +{ + if(offset == ustring::npos) + return ustring::npos; + + const char *const pdata = str.data(); + return g_utf8_pointer_to_offset(pdata, pdata + offset); +} + + +// Helper to implement ustring::find_first_of() and find_first_not_of(). +// Returns the UTF-8 character offset, or ustring::npos if not found. +static +ustring::size_type utf8_find_first_of(const std::string& str, ustring::size_type offset, + const char* utf8_match, long utf8_match_size, + bool find_not_of) +{ + const ustring::size_type byte_offset = utf8_byte_offset(str, offset); + if(byte_offset == ustring::npos) + return ustring::npos; + + long ucs4_match_size = 0; + const Glib::ScopedPtr<gunichar> ucs4_match + (g_utf8_to_ucs4_fast(utf8_match, utf8_match_size, &ucs4_match_size)); + + const gunichar *const match_begin = ucs4_match.get(); + const gunichar *const match_end = match_begin + ucs4_match_size; + + const char *const str_begin = str.data(); + const char *const str_end = str_begin + str.size(); + + for(const char* pstr = str_begin + byte_offset; + pstr < str_end; + pstr = g_utf8_next_char(pstr)) + { + const gunichar *const pfound = std::find(match_begin, match_end, g_utf8_get_char(pstr)); + + if((pfound != match_end) != find_not_of) + return offset; + + ++offset; + } + + return ustring::npos; +} + +// Helper to implement ustring::find_last_of() and find_last_not_of(). +// Returns the UTF-8 character offset, or ustring::npos if not found. +static +ustring::size_type utf8_find_last_of(const std::string& str, ustring::size_type offset, + const char* utf8_match, long utf8_match_size, + bool find_not_of) +{ + long ucs4_match_size = 0; + const Glib::ScopedPtr<gunichar> ucs4_match + (g_utf8_to_ucs4_fast(utf8_match, utf8_match_size, &ucs4_match_size)); + + const gunichar *const match_begin = ucs4_match.get(); + const gunichar *const match_end = match_begin + ucs4_match_size; + + const char *const str_begin = str.data(); + const char* pstr = str_begin; + + // Set pstr one byte beyond the actual start position. + const ustring::size_type byte_offset = utf8_byte_offset(str, offset); + pstr += (byte_offset < str.size()) ? byte_offset + 1 : str.size(); + + while(pstr > str_begin) + { + // Move to previous character. + do + --pstr; + while((static_cast<unsigned char>(*pstr) & 0xC0u) == 0x80); + + const gunichar *const pfound = std::find(match_begin, match_end, g_utf8_get_char(pstr)); + + if((pfound != match_end) != find_not_of) + return g_utf8_pointer_to_offset(str_begin, pstr); + } + + return ustring::npos; +} + +} // anonymous namespace + + +namespace Glib +{ + +#ifndef GLIBMM_HAVE_ALLOWS_STATIC_INLINE_NPOS +// Initialize static member here, +// because the compiler did not allow us do it inline. +const ustring::size_type ustring::npos = std::string::npos; +#endif + +/* + * We need our own version of g_utf8_get_char(), because the std::string + * iterator is not necessarily a plain pointer (it's in fact not in GCC's + * libstdc++-v3). Copying the UTF-8 data into a temporary buffer isn't an + * option since this operation is quite time critical. The implementation + * is quite different from g_utf8_get_char() -- both more generic and likely + * faster. + * + * By looking at the first byte of a UTF-8 character one can determine the + * number of bytes used. GLib offers the g_utf8_skip[] array for this purpose, + * but accessing this global variable would, on IA32 at least, introduce + * a function call to fetch the Global Offset Table, plus two levels of + * indirection in order to read the value. Even worse, fetching the GOT is + * always done right at the start of the function instead of the branch that + * actually uses the variable. + * + * Fortunately, there's a better way to get the byte count. As this table + * shows, there's a nice regular pattern in the UTF-8 encoding scheme: + * + * 0x00000000 - 0x0000007F: 0xxxxxxx + * 0x00000080 - 0x000007FF: 110xxxxx 10xxxxxx + * 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx + * 0x00010000 - 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * 0x00200000 - 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + * 0x04000000 - 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + * + * Except for the single byte case, the number of leading 1-bits equals the + * byte count. All that is needed is to shift the first byte to the left + * until bit 7 becomes 0. Naturally, doing so requires a loop -- but since + * we already have one, no additional cost is introduced. This shifting can + * further be combined with the computation of the bitmask needed to eliminate + * the leading length bits, thus saving yet another register. + * + * Note: If you change this code, it is advisable to also review what the + * compiler makes of it in the assembler output. Except for some pointless + * register moves, the generated code is sufficiently close to the optimum + * with GCC 4.1.2 on x86_64. + */ +gunichar get_unichar_from_std_iterator(std::string::const_iterator pos) +{ + unsigned int result = static_cast<unsigned char>(*pos); + + if((result & 0x80) != 0) + { + unsigned int mask = 0x40; + + do + { + result <<= 6; + const unsigned int c = static_cast<unsigned char>(*++pos); + mask <<= 5; + result += c - 0x80; + } + while((result & mask) != 0); + + result &= mask - 1; + } + + return result; +} + + +/**** Glib::ustring ********************************************************/ + +ustring::ustring() +: + string_ () +{} + +ustring::ustring(const ustring& other) +: + string_ (other.string_) +{} + +ustring::ustring(const ustring& src, ustring::size_type i, ustring::size_type n) +: + string_ () +{ + const Utf8SubstrBounds bounds (src.string_, i, n); + string_.assign(src.string_, bounds.i, bounds.n); +} + +ustring::ustring(const char* src, ustring::size_type n) +: + string_ (src, utf8_byte_offset(src, n)) +{} + +ustring::ustring(const char* src) +: + string_ (src) +{} + +ustring::ustring(ustring::size_type n, gunichar uc) +: + string_ () +{ + if(uc < 0x80) + { + // Optimize the probably most common case. + string_.assign(n, static_cast<char>(uc)); + } + else + { + const UnicharToUtf8 conv (uc); + string_.reserve(n * conv.len); + + for(; n > 0; --n) + string_.append(conv.buf, conv.len); + } +} + +ustring::ustring(ustring::size_type n, char c) +: + string_ (n, c) +{} + +ustring::ustring(const std::string& src) +: + string_ (src) +{} + +ustring::~ustring() +{} + +void ustring::swap(ustring& other) +{ + string_.swap(other.string_); +} + + +/**** Glib::ustring::operator=() *******************************************/ + +ustring& ustring::operator=(const ustring& other) +{ + string_ = other.string_; + return *this; +} + +ustring& ustring::operator=(const std::string& src) +{ + string_ = src; + return *this; +} + +ustring& ustring::operator=(const char* src) +{ + string_ = src; + return *this; +} + +ustring& ustring::operator=(gunichar uc) +{ + const UnicharToUtf8 conv (uc); + string_.assign(conv.buf, conv.len); + return *this; +} + +ustring& ustring::operator=(char c) +{ + string_ = c; + return *this; +} + + +/**** Glib::ustring::assign() **********************************************/ + +ustring& ustring::assign(const ustring& src) +{ + string_ = src.string_; + return *this; +} + +ustring& ustring::assign(const ustring& src, ustring::size_type i, ustring::size_type n) +{ + const Utf8SubstrBounds bounds (src.string_, i, n); + string_.assign(src.string_, bounds.i, bounds.n); + return *this; +} + +ustring& ustring::assign(const char* src, ustring::size_type n) +{ + string_.assign(src, utf8_byte_offset(src, n)); + return *this; +} + +ustring& ustring::assign(const char* src) +{ + string_ = src; + return *this; +} + +ustring& ustring::assign(ustring::size_type n, gunichar uc) +{ + ustring temp (n, uc); + string_.swap(temp.string_); + return *this; +} + +ustring& ustring::assign(ustring::size_type n, char c) +{ + string_.assign(n, c); + return *this; +} + + +/**** Glib::ustring::operator+=() ******************************************/ + +ustring& ustring::operator+=(const ustring& src) +{ + string_ += src.string_; + return *this; +} + +ustring& ustring::operator+=(const char* src) +{ + string_ += src; + return *this; +} + +ustring& ustring::operator+=(gunichar uc) +{ + const UnicharToUtf8 conv (uc); + string_.append(conv.buf, conv.len); + return *this; +} + +ustring& ustring::operator+=(char c) +{ + string_ += c; + return *this; +} + + +/**** Glib::ustring::push_back() *******************************************/ + +void ustring::push_back(gunichar uc) +{ + const UnicharToUtf8 conv (uc); + string_.append(conv.buf, conv.len); +} + +void ustring::push_back(char c) +{ + string_ += c; +} + + +/**** Glib::ustring::append() **********************************************/ + +ustring& ustring::append(const ustring& src) +{ + string_ += src.string_; + return *this; +} + +ustring& ustring::append(const ustring& src, ustring::size_type i, ustring::size_type n) +{ + const Utf8SubstrBounds bounds (src.string_, i, n); + string_.append(src.string_, bounds.i, bounds.n); + return *this; +} + +ustring& ustring::append(const char* src, ustring::size_type n) +{ + string_.append(src, utf8_byte_offset(src, n)); + return *this; +} + +ustring& ustring::append(const char* src) +{ + string_ += src; + return *this; +} + +ustring& ustring::append(ustring::size_type n, gunichar uc) +{ + string_.append(ustring(n, uc).string_); + return *this; +} + +ustring& ustring::append(ustring::size_type n, char c) +{ + string_.append(n, c); + return *this; +} + + +/**** Glib::ustring::insert() **********************************************/ + +ustring& ustring::insert(ustring::size_type i, const ustring& src) +{ + string_.insert(utf8_byte_offset(string_, i), src.string_); + return *this; +} + +ustring& ustring::insert(ustring::size_type i, const ustring& src, + ustring::size_type i2, ustring::size_type n) +{ + const Utf8SubstrBounds bounds2 (src.string_, i2, n); + string_.insert(utf8_byte_offset(string_, i), src.string_, bounds2.i, bounds2.n); + return *this; +} + +ustring& ustring::insert(ustring::size_type i, const char* src, ustring::size_type n) +{ + string_.insert(utf8_byte_offset(string_, i), src, utf8_byte_offset(src, n)); + return *this; +} + +ustring& ustring::insert(ustring::size_type i, const char* src) +{ + string_.insert(utf8_byte_offset(string_, i), src); + return *this; +} + +ustring& ustring::insert(ustring::size_type i, ustring::size_type n, gunichar uc) +{ + string_.insert(utf8_byte_offset(string_, i), ustring(n, uc).string_); + return *this; +} + +ustring& ustring::insert(ustring::size_type i, ustring::size_type n, char c) +{ + string_.insert(utf8_byte_offset(string_, i), n, c); + return *this; +} + +ustring::iterator ustring::insert(ustring::iterator p, gunichar uc) +{ + const size_type offset = p.base() - string_.begin(); + const UnicharToUtf8 conv (uc); + string_.insert(offset, conv.buf, conv.len); + return iterator(string_.begin() + offset); +} + +ustring::iterator ustring::insert(ustring::iterator p, char c) +{ + return iterator(string_.insert(p.base(), c)); +} + +void ustring::insert(ustring::iterator p, ustring::size_type n, gunichar uc) +{ + string_.insert(p.base() - string_.begin(), ustring(n, uc).string_); +} + +void ustring::insert(ustring::iterator p, ustring::size_type n, char c) +{ + string_.insert(p.base(), n, c); +} + + +/**** Glib::ustring::replace() *********************************************/ + +ustring& ustring::replace(ustring::size_type i, ustring::size_type n, const ustring& src) +{ + const Utf8SubstrBounds bounds (string_, i, n); + string_.replace(bounds.i, bounds.n, src.string_); + return *this; +} + +ustring& ustring::replace(ustring::size_type i, ustring::size_type n, + const ustring& src, ustring::size_type i2, ustring::size_type n2) +{ + const Utf8SubstrBounds bounds (string_, i, n); + const Utf8SubstrBounds bounds2 (src.string_, i2, n2); + string_.replace(bounds.i, bounds.n, src.string_, bounds2.i, bounds2.n); + return *this; +} + +ustring& ustring::replace(ustring::size_type i, ustring::size_type n, + const char* src, ustring::size_type n2) +{ + const Utf8SubstrBounds bounds (string_, i, n); + string_.replace(bounds.i, bounds.n, src, utf8_byte_offset(src, n2)); + return *this; +} + +ustring& ustring::replace(ustring::size_type i, ustring::size_type n, const char* src) +{ + const Utf8SubstrBounds bounds (string_, i, n); + string_.replace(bounds.i, bounds.n, src); + return *this; +} + +ustring& ustring::replace(ustring::size_type i, ustring::size_type n, + ustring::size_type n2, gunichar uc) +{ + const Utf8SubstrBounds bounds (string_, i, n); + string_.replace(bounds.i, bounds.n, ustring(n2, uc).string_); + return *this; +} + +ustring& ustring::replace(ustring::size_type i, ustring::size_type n, + ustring::size_type n2, char c) +{ + const Utf8SubstrBounds bounds (string_, i, n); + string_.replace(bounds.i, bounds.n, n2, c); + return *this; +} + +ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, const ustring& src) +{ + string_.replace(pbegin.base(), pend.base(), src.string_); + return *this; +} + +ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, + const char* src, ustring::size_type n) +{ + string_.replace(pbegin.base(), pend.base(), src, utf8_byte_offset(src, n)); + return *this; +} + +ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, const char* src) +{ + string_.replace(pbegin.base(), pend.base(), src); + return *this; +} + +ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, + ustring::size_type n, gunichar uc) +{ + string_.replace(pbegin.base(), pend.base(), ustring(n, uc).string_); + return *this; +} + +ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, + ustring::size_type n, char c) +{ + string_.replace(pbegin.base(), pend.base(), n, c); + return *this; +} + + +/**** Glib::ustring::erase() ***********************************************/ + +void ustring::clear() +{ + string_.erase(); +} + +ustring& ustring::erase(ustring::size_type i, ustring::size_type n) +{ + const Utf8SubstrBounds bounds (string_, i, n); + string_.erase(bounds.i, bounds.n); + return *this; +} + +ustring& ustring::erase() +{ + string_.erase(); + return *this; +} + +ustring::iterator ustring::erase(ustring::iterator p) +{ + ustring::iterator iter_end = p; + ++iter_end; + + return iterator(string_.erase(p.base(), iter_end.base())); +} + +ustring::iterator ustring::erase(ustring::iterator pbegin, ustring::iterator pend) +{ + return iterator(string_.erase(pbegin.base(), pend.base())); +} + + +/**** Glib::ustring::compare() *********************************************/ + +int ustring::compare(const ustring& rhs) const +{ + return g_utf8_collate(string_.c_str(), rhs.string_.c_str()); +} + +int ustring::compare(const char* rhs) const +{ + return g_utf8_collate(string_.c_str(), rhs); +} + +int ustring::compare(ustring::size_type i, ustring::size_type n, const ustring& rhs) const +{ + return ustring(*this, i, n).compare(rhs); +} + +int ustring::compare(ustring::size_type i, ustring::size_type n, + const ustring& rhs, ustring::size_type i2, ustring::size_type n2) const +{ + return ustring(*this, i, n).compare(ustring(rhs, i2, n2)); +} + +int ustring::compare(ustring::size_type i, ustring::size_type n, + const char* rhs, ustring::size_type n2) const +{ + return ustring(*this, i, n).compare(ustring(rhs, n2)); +} + +int ustring::compare(ustring::size_type i, ustring::size_type n, const char* rhs) const +{ + return ustring(*this, i, n).compare(rhs); +} + + +/**** Glib::ustring -- index access ****************************************/ + +ustring::value_type ustring::operator[](ustring::size_type i) const +{ + return g_utf8_get_char(g_utf8_offset_to_pointer(string_.data(), i)); +} + +ustring::value_type ustring::at(ustring::size_type i) const +{ + const size_type byte_offset = utf8_byte_offset(string_, i); + + // Throws std::out_of_range if the index is invalid. + return g_utf8_get_char(&string_.at(byte_offset)); +} + + +/**** Glib::ustring -- iterator access *************************************/ + +ustring::iterator ustring::begin() +{ + return iterator(string_.begin()); +} + +ustring::iterator ustring::end() +{ + return iterator(string_.end()); +} + +ustring::const_iterator ustring::begin() const +{ + return const_iterator(string_.begin()); +} + +ustring::const_iterator ustring::end() const +{ + return const_iterator(string_.end()); +} + +ustring::reverse_iterator ustring::rbegin() +{ + return reverse_iterator(iterator(string_.end())); +} + +ustring::reverse_iterator ustring::rend() +{ + return reverse_iterator(iterator(string_.begin())); +} + +ustring::const_reverse_iterator ustring::rbegin() const +{ + return const_reverse_iterator(const_iterator(string_.end())); +} + +ustring::const_reverse_iterator ustring::rend() const +{ + return const_reverse_iterator(const_iterator(string_.begin())); +} + + +/**** Glib::ustring::find() ************************************************/ + +ustring::size_type ustring::find(const ustring& str, ustring::size_type i) const +{ + return utf8_char_offset(string_, string_.find(str.string_, utf8_byte_offset(string_, i))); +} + +ustring::size_type ustring::find(const char* str, ustring::size_type i, ustring::size_type n) const +{ + return utf8_char_offset(string_, string_.find(str, utf8_byte_offset(string_, i), + utf8_byte_offset(str, n))); +} + +ustring::size_type ustring::find(const char* str, ustring::size_type i) const +{ + return utf8_char_offset(string_, string_.find(str, utf8_byte_offset(string_, i))); +} + +ustring::size_type ustring::find(gunichar uc, ustring::size_type i) const +{ + const UnicharToUtf8 conv (uc); + return utf8_char_offset(string_, string_.find(conv.buf, utf8_byte_offset(string_, i), conv.len)); +} + +ustring::size_type ustring::find(char c, ustring::size_type i) const +{ + return utf8_char_offset(string_, string_.find(c, utf8_byte_offset(string_, i))); +} + + +/**** Glib::ustring::rfind() ***********************************************/ + +ustring::size_type ustring::rfind(const ustring& str, ustring::size_type i) const +{ + return utf8_char_offset(string_, string_.rfind(str.string_, utf8_byte_offset(string_, i))); +} + +ustring::size_type ustring::rfind(const char* str, ustring::size_type i, + ustring::size_type n) const +{ + return utf8_char_offset(string_, string_.rfind(str, utf8_byte_offset(string_, i), + utf8_byte_offset(str, n))); +} + +ustring::size_type ustring::rfind(const char* str, ustring::size_type i) const +{ + return utf8_char_offset(string_, string_.rfind(str, utf8_byte_offset(string_, i))); +} + +ustring::size_type ustring::rfind(gunichar uc, ustring::size_type i) const +{ + const UnicharToUtf8 conv (uc); + return utf8_char_offset(string_, string_.rfind(conv.buf, utf8_byte_offset(string_, i), conv.len)); +} + +ustring::size_type ustring::rfind(char c, ustring::size_type i) const +{ + return utf8_char_offset(string_, string_.rfind(c, utf8_byte_offset(string_, i))); +} + + +/**** Glib::ustring::find_first_of() ***************************************/ + +ustring::size_type ustring::find_first_of(const ustring& match, ustring::size_type i) const +{ + return utf8_find_first_of(string_, i, match.string_.data(), match.string_.size(), false); +} + +ustring::size_type ustring::find_first_of(const char* match, + ustring::size_type i, ustring::size_type n) const +{ + return utf8_find_first_of(string_, i, match, n, false); +} + +ustring::size_type ustring::find_first_of(const char* match, ustring::size_type i) const +{ + return utf8_find_first_of(string_, i, match, -1, false); +} + +ustring::size_type ustring::find_first_of(gunichar uc, ustring::size_type i) const +{ + return find(uc, i); +} + +ustring::size_type ustring::find_first_of(char c, ustring::size_type i) const +{ + return find(c, i); +} + + +/**** Glib::ustring::find_last_of() ****************************************/ + +ustring::size_type ustring::find_last_of(const ustring& match, ustring::size_type i) const +{ + return utf8_find_last_of(string_, i, match.string_.data(), match.string_.size(), false); +} + +ustring::size_type ustring::find_last_of(const char* match, + ustring::size_type i, ustring::size_type n) const +{ + return utf8_find_last_of(string_, i, match, n, false); +} + +ustring::size_type ustring::find_last_of(const char* match, ustring::size_type i) const +{ + return utf8_find_last_of(string_, i, match, -1, false); +} + +ustring::size_type ustring::find_last_of(gunichar uc, ustring::size_type i) const +{ + return rfind(uc, i); +} + +ustring::size_type ustring::find_last_of(char c, ustring::size_type i) const +{ + return rfind(c, i); +} + + +/**** Glib::ustring::find_first_not_of() ***********************************/ + +ustring::size_type ustring::find_first_not_of(const ustring& match, ustring::size_type i) const +{ + return utf8_find_first_of(string_, i, match.string_.data(), match.string_.size(), true); +} + +ustring::size_type ustring::find_first_not_of(const char* match, + ustring::size_type i, ustring::size_type n) const +{ + return utf8_find_first_of(string_, i, match, n, true); +} + +ustring::size_type ustring::find_first_not_of(const char* match, ustring::size_type i) const +{ + return utf8_find_first_of(string_, i, match, -1, true); +} + +// Unfortunately, all of the find_*_not_of() methods for single +// characters need their own special implementation. +// +ustring::size_type ustring::find_first_not_of(gunichar uc, ustring::size_type i) const +{ + const size_type bi = utf8_byte_offset(string_, i); + if(bi != npos) + { + const char *const pbegin = string_.data(); + const char *const pend = pbegin + string_.size(); + + for(const char* p = pbegin + bi; + p < pend; + p = g_utf8_next_char(p), ++i) + { + if(g_utf8_get_char(p) != uc) + return i; + } + } + return npos; +} + +ustring::size_type ustring::find_first_not_of(char c, ustring::size_type i) const +{ + const size_type bi = utf8_byte_offset(string_, i); + if(bi != npos) + { + const char *const pbegin = string_.data(); + const char *const pend = pbegin + string_.size(); + + for(const char* p = pbegin + bi; + p < pend; + p = g_utf8_next_char(p), ++i) + { + if(*p != c) + return i; + } + } + return npos; +} + + +/**** Glib::ustring::find_last_not_of() ************************************/ + +ustring::size_type ustring::find_last_not_of(const ustring& match, ustring::size_type i) const +{ + return utf8_find_last_of(string_, i, match.string_.data(), match.string_.size(), true); +} + +ustring::size_type ustring::find_last_not_of(const char* match, + ustring::size_type i, ustring::size_type n) const +{ + return utf8_find_last_of(string_, i, match, n, true); +} + +ustring::size_type ustring::find_last_not_of(const char* match, ustring::size_type i) const +{ + return utf8_find_last_of(string_, i, match, -1, true); +} + +// Unfortunately, all of the find_*_not_of() methods for single +// characters need their own special implementation. +// +ustring::size_type ustring::find_last_not_of(gunichar uc, ustring::size_type i) const +{ + const char *const pbegin = string_.data(); + const char *const pend = pbegin + string_.size(); + size_type i_cur = 0; + size_type i_found = npos; + + for(const char* p = pbegin; + p < pend && i_cur <= i; + p = g_utf8_next_char(p), ++i_cur) + { + if(g_utf8_get_char(p) != uc) + i_found = i_cur; + } + return i_found; +} + +ustring::size_type ustring::find_last_not_of(char c, ustring::size_type i) const +{ + const char *const pbegin = string_.data(); + const char *const pend = pbegin + string_.size(); + size_type i_cur = 0; + size_type i_found = npos; + + for(const char* p = pbegin; + p < pend && i_cur <= i; + p = g_utf8_next_char(p), ++i_cur) + { + if(*p != c) + i_found = i_cur; + } + return i_found; +} + + +/**** Glib::ustring -- get size and resize *********************************/ + +bool ustring::empty() const +{ + return string_.empty(); +} + +ustring::size_type ustring::size() const +{ + const char *const pdata = string_.data(); + return g_utf8_pointer_to_offset(pdata, pdata + string_.size()); +} + +ustring::size_type ustring::length() const +{ + const char *const pdata = string_.data(); + return g_utf8_pointer_to_offset(pdata, pdata + string_.size()); +} + +ustring::size_type ustring::bytes() const +{ + return string_.size(); +} + +ustring::size_type ustring::capacity() const +{ + return string_.capacity(); +} + +ustring::size_type ustring::max_size() const +{ + return string_.max_size(); +} + +void ustring::resize(ustring::size_type n, gunichar uc) +{ + const size_type size_now = size(); + if(n < size_now) + erase(n, npos); + else if(n > size_now) + append(n - size_now, uc); +} + +void ustring::resize(ustring::size_type n, char c) +{ + const size_type size_now = size(); + if(n < size_now) + erase(n, npos); + else if(n > size_now) + string_.append(n - size_now, c); +} + +void ustring::reserve(ustring::size_type n) +{ + string_.reserve(n); +} + + +/**** Glib::ustring -- C string access *************************************/ + +const char* ustring::data() const +{ + return string_.data(); +} + +const char* ustring::c_str() const +{ + return string_.c_str(); +} + +// Note that copy() requests UTF-8 character offsets as +// parameters, but returns the number of copied bytes. +// +ustring::size_type ustring::copy(char* dest, ustring::size_type n, ustring::size_type i) const +{ + const Utf8SubstrBounds bounds (string_, i, n); + return string_.copy(dest, bounds.n, bounds.i); +} + + +/**** Glib::ustring -- UTF-8 utilities *************************************/ + +bool ustring::validate() const +{ + return (g_utf8_validate(string_.data(), string_.size(), 0) != 0); +} + +bool ustring::validate(ustring::iterator& first_invalid) +{ + const char *const pdata = string_.data(); + const char* valid_end = pdata; + const int is_valid = g_utf8_validate(pdata, string_.size(), &valid_end); + + first_invalid = iterator(string_.begin() + (valid_end - pdata)); + return (is_valid != 0); +} + +bool ustring::validate(ustring::const_iterator& first_invalid) const +{ + const char *const pdata = string_.data(); + const char* valid_end = pdata; + const int is_valid = g_utf8_validate(pdata, string_.size(), &valid_end); + + first_invalid = const_iterator(string_.begin() + (valid_end - pdata)); + return (is_valid != 0); +} + +bool ustring::is_ascii() const +{ + const char* p = string_.data(); + const char *const pend = p + string_.size(); + + for(; p != pend; ++p) + { + if((static_cast<unsigned char>(*p) & 0x80u) != 0) + return false; + } + + return true; +} + +ustring ustring::normalize(NormalizeMode mode) const +{ + const ScopedPtr<char> buf (g_utf8_normalize(string_.data(), string_.size(), + static_cast<GNormalizeMode>(int(mode)))); + return ustring(buf.get()); +} + +ustring ustring::uppercase() const +{ + const ScopedPtr<char> buf (g_utf8_strup(string_.data(), string_.size())); + return ustring(buf.get()); +} + +ustring ustring::lowercase() const +{ + const ScopedPtr<char> buf (g_utf8_strdown(string_.data(), string_.size())); + return ustring(buf.get()); +} + +ustring ustring::casefold() const +{ + const ScopedPtr<char> buf (g_utf8_casefold(string_.data(), string_.size())); + return ustring(buf.get()); +} + +std::string ustring::collate_key() const +{ + const ScopedPtr<char> buf (g_utf8_collate_key(string_.data(), string_.size())); + return std::string(buf.get()); +} + +std::string ustring::casefold_collate_key() const +{ + char *const casefold_buf = g_utf8_casefold(string_.data(), string_.size()); + char *const key_buf = g_utf8_collate_key(casefold_buf, -1); + g_free(casefold_buf); + return std::string(ScopedPtr<char>(key_buf).get()); +} + +/**** Glib::ustring -- Message formatting **********************************/ + +// static +ustring ustring::compose_argv(const Glib::ustring& fmt, int argc, const ustring* const* argv) +{ + std::string::size_type result_size = fmt.raw().size(); + + // Guesstimate the final string size. + for (int i = 0; i < argc; ++i) + result_size += argv[i]->raw().size(); + + std::string result; + result.reserve(result_size); + + const char* const pfmt = fmt.raw().c_str(); + const char* start = pfmt; + + while (const char* const stop = std::strchr(start, '%')) + { + if (stop[1] == '%') + { + result.append(start, stop - start + 1); + start = stop + 2; + } + else + { + const int index = Ascii::digit_value(stop[1]) - 1; + + if (index >= 0 && index < argc) + { + result.append(start, stop - start); + result += argv[index]->raw(); + start = stop + 2; + } + else + { + const char* const next = (stop[1] != '\0') ? g_utf8_next_char(stop + 1) : (stop + 1); + + // Copy invalid substitutions literally to the output. + result.append(start, next - start); + + g_warning("invalid substitution \"%s\" in fmt string \"%s\"", + result.c_str() + result.size() - (next - stop), pfmt); + start = next; + } + } + } + + result.append(start, pfmt + fmt.raw().size() - start); + + return result; +} + +/**** Glib::ustring::SequenceToString **************************************/ + +ustring::SequenceToString<Glib::ustring::iterator,gunichar> + ::SequenceToString(Glib::ustring::iterator pbegin, Glib::ustring::iterator pend) +: + std::string(pbegin.base(), pend.base()) +{} + +ustring::SequenceToString<Glib::ustring::const_iterator,gunichar> + ::SequenceToString(Glib::ustring::const_iterator pbegin, Glib::ustring::const_iterator pend) +: + std::string(pbegin.base(), pend.base()) +{} + +/**** Glib::ustring::FormatStream ******************************************/ + +ustring::FormatStream::FormatStream() +: + stream_ () +{} + +ustring::FormatStream::~FormatStream() +{} + +ustring ustring::FormatStream::to_string() const +{ + GError* error = 0; + +#ifdef GLIBMM_HAVE_WIDE_STREAM + const std::wstring str = stream_.str(); + +# if defined(__STDC_ISO_10646__) && SIZEOF_WCHAR_T == 4 + // Avoid going through iconv if wchar_t always contains UCS-4. + glong n_bytes = 0; + const ScopedPtr<char> buf (g_ucs4_to_utf8(reinterpret_cast<const gunichar*>(str.data()), + str.size(), 0, &n_bytes, &error)); +# elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2 + // Avoid going through iconv if wchar_t always contains UTF-16. + glong n_bytes = 0; + const ScopedPtr<char> buf (g_utf16_to_utf8(reinterpret_cast<const gunichar2*>(str.data()), + str.size(), 0, &n_bytes, &error)); +# else + gsize n_bytes = 0; + const ScopedPtr<char> buf (g_convert(reinterpret_cast<const char*>(str.data()), + str.size() * sizeof(std::wstring::value_type), + "UTF-8", "WCHAR_T", 0, &n_bytes, &error)); +# endif /* !(__STDC_ISO_10646__ || G_OS_WIN32) */ + +#else /* !GLIBMM_HAVE_WIDE_STREAM */ + const std::string str = stream_.str(); + + gsize n_bytes = 0; + const ScopedPtr<char> buf (g_locale_to_utf8(str.data(), str.size(), 0, &n_bytes, &error)); +#endif /* !GLIBMM_HAVE_WIDE_STREAM */ + + if (error) + { + Glib::Error::throw_exception(error); + } + + return ustring(buf.get(), buf.get() + n_bytes); +} + +/**** Glib::ustring -- stream I/O operators ********************************/ + +std::istream& operator>>(std::istream& is, Glib::ustring& utf8_string) +{ + std::string str; + is >> str; + + GError* error = 0; + gsize n_bytes = 0; + const ScopedPtr<char> buf (g_locale_to_utf8(str.data(), str.size(), 0, &n_bytes, &error)); + + if (error) + { + Glib::Error::throw_exception(error); + } + + utf8_string.assign(buf.get(), buf.get() + n_bytes); + + return is; +} + +std::ostream& operator<<(std::ostream& os, const Glib::ustring& utf8_string) +{ + GError* error = 0; + const ScopedPtr<char> buf (g_locale_from_utf8(utf8_string.raw().data(), + utf8_string.raw().size(), 0, 0, &error)); + if (error) + { + Glib::Error::throw_exception(error); + } + + // This won't work if the string contains NUL characters. Unfortunately, + // std::ostream::write() ignores format flags, so we cannot use that. + // The only option would be to create a temporary std::string. However, + // even then GCC's libstdc++-v3 prints only the characters up to the first + // NUL. Given this, there doesn't seem much of a point in allowing NUL in + // formatted output. The semantics would be unclear anyway: what's the + // screen width of a NUL? + os << buf.get(); + + return os; +} + +#ifdef GLIBMM_HAVE_WIDE_STREAM + +std::wistream& operator>>(std::wistream& is, ustring& utf8_string) +{ + GError* error = 0; + + std::wstring wstr; + is >> wstr; + +#if defined(__STDC_ISO_10646__) && SIZEOF_WCHAR_T == 4 + // Avoid going through iconv if wchar_t always contains UCS-4. + glong n_bytes = 0; + const ScopedPtr<char> buf (g_ucs4_to_utf8(reinterpret_cast<const gunichar*>(wstr.data()), + wstr.size(), 0, &n_bytes, &error)); +#elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2 + // Avoid going through iconv if wchar_t always contains UTF-16. + glong n_bytes = 0; + const ScopedPtr<char> buf (g_utf16_to_utf8(reinterpret_cast<const gunichar2*>(wstr.data()), + wstr.size(), 0, &n_bytes, &error)); +#else + gsize n_bytes = 0; + const ScopedPtr<char> buf (g_convert(reinterpret_cast<const char*>(wstr.data()), + wstr.size() * sizeof(std::wstring::value_type), + "UTF-8", "WCHAR_T", 0, &n_bytes, &error)); +#endif // !(__STDC_ISO_10646__ || G_OS_WIN32) + + if (error) + { + Glib::Error::throw_exception(error); + } + + utf8_string.assign(buf.get(), buf.get() + n_bytes); + + return is; +} + +std::wostream& operator<<(std::wostream& os, const ustring& utf8_string) +{ + GError* error = 0; + +#if defined(__STDC_ISO_10646__) && SIZEOF_WCHAR_T == 4 + // Avoid going through iconv if wchar_t always contains UCS-4. + const ScopedPtr<gunichar> buf (g_utf8_to_ucs4(utf8_string.raw().data(), + utf8_string.raw().size(), 0, 0, &error)); +#elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2 + // Avoid going through iconv if wchar_t always contains UTF-16. + const ScopedPtr<gunichar2> buf (g_utf8_to_utf16(utf8_string.raw().data(), + utf8_string.raw().size(), 0, 0, &error)); +#else + // TODO: For some reason the conversion from UTF-8 to WCHAR_T doesn't work + // with g_convert(), while iconv on the command line handles it just fine. + // Maybe a bug in GLib? + const ScopedPtr<char> buf (g_convert(utf8_string.raw().data(), utf8_string.raw().size(), + "WCHAR_T", "UTF-8", 0, 0, &error)); +#endif // !(__STDC_ISO_10646__ || G_OS_WIN32) + + if (error) + { + Glib::Error::throw_exception(error); + } + + // This won't work if the string contains NUL characters. Unfortunately, + // std::wostream::write() ignores format flags, so we cannot use that. + // The only option would be to create a temporary std::wstring. However, + // even then GCC's libstdc++-v3 prints only the characters up to the first + // NUL. Given this, there doesn't seem much of a point in allowing NUL in + // formatted output. The semantics would be unclear anyway: what's the + // screen width of a NUL? + os << reinterpret_cast<wchar_t*>(buf.get()); + + return os; +} + +#endif /* GLIBMM_HAVE_WIDE_STREAM */ + +} // namespace Glib Property changes on: trunk/src/libustring/ustring.cc ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:eol-style + native Added: trunk/src/libustring/ustring.h =================================================================== --- trunk/src/libustring/ustring.h (rev 0) +++ trunk/src/libustring/ustring.h 2012-03-20 13:35:13 UTC (rev 200) @@ -0,0 +1,1615 @@ +// -*- c++ -*- +#ifndef _GLIBMM_USTRING_H +#define _GLIBMM_USTRING_H + +/* $Id$ */ + +/* Copyright (C) 2002 The gtkmm Development Team + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <glibmmconfig.h> +#include <glibmm/unicode.h> +#include <glib.h> + +#include <iosfwd> +#include <iterator> +#include <sstream> +#include <string> +#ifndef GLIBMM_HAVE_STD_ITERATOR_TRAITS +#include <cstddef> /* for ptrdiff_t */ +#endif + +namespace Glib +{ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +#ifndef GLIBMM_HAVE_STD_ITERATOR_TRAITS + +template <class T> +struct IteratorTraits +{ + typedef typename T::iterator_category iterator_category; + typedef typename T::value_type value_type; + typedef typename T::difference_type difference_type; + typedef typename T::pointer pointer; + typedef typename T::reference reference; +}; + +template <class T> +struct IteratorTraits<T*> +{ + typedef std::random_access_iterator_tag iterator_category; + typedef T value_type; + typedef ptrdiff_t difference_type; + typedef T* pointer; + typedef T& reference; +}; + +template <class T> +struct IteratorTraits<const T*> +{ + typedef std::random_access_iterator_tag iterator_category; + typedef T value_type; + typedef ptrdiff_t difference_type; + typedef const T* pointer; + typedef const T& reference; +}; + +#endif /* GLIBMM_HAVE_STD_ITERATOR_TRAITS */ +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + + +/** The iterator type of Glib::ustring. + * Note this is not a random access iterator but a bidirectional one, + * since all index operations need to iterate over the UTF-8 data. Use + * std::advance() to move to a certain position. However, all of the + * relational operators are available: + * <tt>== != < > <= >=</tt> + * + * A writeable iterator isn't provided because: The number of bytes of + * the old UTF-8 character and the new one to write could be different. + * Therefore, any write operation would invalidate all other iterators + * pointing into the same string. + */ +template <class T> +class ustring_Iterator +{ +public: + typedef std::bidirectional_iterator_tag iterator_category; + typedef gunichar value_type; + typedef std::string::difference_type difference_type; + typedef value_type reference; + typedef void pointer; + + inline ustring_Iterator(); + inline ustring_Iterator(const ustring_Iterator<std::string::iterator>& other); + + inline value_type operator*() const; + + inline ustring_Iterator<T> & operator++(); + inline const ustring_Iterator<T> operator++(int); + inline ustring_Iterator<T> & operator--(); + inline const ustring_Iterator<T> operator--(int); + + explicit inline ustring_Iterator(T pos); + inline T base() const; + +private: + T pos_; +}; + + +/** Extract a UCS-4 character from UTF-8 data. + * Convert a single UTF-8 (multibyte) character starting at @p pos to + * a UCS-4 wide character. This may read up to 6 bytes after the start + * position, depending on the UTF-8 character width. You have to make + * sure the source contains at least one valid UTF-8 character. + * + * This is mainly used by the implementation of Glib::ustring::iterator, + * but it might be useful as utility function if you prefer using + * std::string even for UTF-8 encoding. + */ +gunichar get_unichar_from_std_iterator(std::string::const_iterator pos) G_GNUC_PURE; + + +/** Glib::ustring has much the same interface as std::string, but contains + * %Unicode characters encoded as UTF-8. + * + * @par About UTF-8 and ASCII + * @par + * The standard character set ANSI_X3.4-1968 -- more commonly known as + * ASCII -- is a subset of UTF-8. So, if you want to, you can use + * Glib::ustring without even thinking about UTF-8. + * @par + * Whenever ASCII is mentioned in this manual, we mean the @em real ASCII + * (i.e. as defined in ANSI_X3.4-1968), which contains only 7-bit characters. + * Glib::ustring can @em not be used with ASCII-compatible extended 8-bit + * charsets like ISO-8859-1. It's a good idea to avoid string literals + * containing non-ASCII characters (e.g. German umlauts) in source code, + * or at least you should use UTF-8 literals. + * @par + * You can find a detailed UTF-8 and %Unicode FAQ here: + * http://www.cl.cam.ac.uk/~mgk25/unicode.html + * + * @par Glib::ustring vs. std::string + * @par + * Glib::ustring has implicit type conversions to and from std::string. + * These conversions do @em not convert to/from the current locale (see + * Glib::locale_from_utf8() and Glib::locale_to_utf8() if you need that). You + * can always use std::string instead of Glib::ustring -- however, using + * std::string with multi-byte characters is quite hard. For instance, + * <tt>std::string::operator[]</tt> might return a byte in the middle of a + * character, and <tt>std::string::length()</tt> returns the number of bytes + * rather than characters. So don't do that without a good reason. + * @par + * In a perfect world the C++ Standard Library would contain a UTF-8 string + * class. Unfortunately, the C++ standard doesn't mention UTF-8 at all. Note + * that std::wstring is not a UTF-8 string class because it contains only + * fixed-width characters (where width could be 32, 16, or even 8 bits). + * + * @par Glib::ustring and stream input/output + * @par + * The stream I/O operators, that is operator<<() and operator>>(), perform + * implicit charset conversion to/from the current locale. If that's not + * what you intented (e.g. when writing to a configuration file that should + * always be UTF-8 encoded) use ustring::raw() to override this behaviour. + * @par + * If you're using std::ostringstream to build strings for display in the + * user interface, you must convert the result back to UTF-8 as shown below: + * @code + * std::ostringstream output; + * output.imbue(std::locale("")); // use the user's locale for this stream + * output << percentage << " % done"; + * label->set_text(Glib::locale_to_utf8(output.str())); + * @endcode + * + * @par Formatted output and internationalization + * @par + * The methods ustring::compose() and ustring::format() provide a convenient + * and powerful alternative to string streams, as shown in the example below. + * Refer to the method documentation of compose() and format() for details. + * @code + * using Glib::ustring; + * + * ustring message = ustring::compose("%1 is lower than 0x%2.", + * 12, ustring::format(std::hex, 16)); + * @endcode + * + * @par Implementation notes + * @par + * Glib::ustring does not inherit from std::string, because std::string was + * intended to be a final class. For instance, it does not have a virtual + * destructor. Also, a HAS-A relationship is more appropriate because + * ustring can't just enhance the std::string interface. Rather, it has to + * reimplement the interface so that all operations are based on characters + * instead of bytes. + */ +class ustring +{ +public: + typedef std::string::size_type size_type; + typedef std::string::difference_type difference_type; + + typedef gunichar value_type; + typedef gunichar & reference; + typedef const gunichar & const_reference; + + typedef ustring_Iterator<std::string::iterator> iterator; + typedef ustring_Iterator<std::string::const_iterator> const_iterator; + +#ifndef GLIBMM_HAVE_SUN_REVERSE_ITERATOR + + typedef std::reverse_iterator<iterator> reverse_iterator; + typedef std::reverse_iterator<const_iterator> const_reverse_iterator; + +#else + + typedef std::reverse_iterator<iterator, + iterator::iterator_category, + iterator::value_type, + iterator::reference, + iterator::pointer, + iterator::difference_type> reverse_iterator; + typedef std::reverse_iterator<const_iterator, + const_iterator::iterator_category, + const_iterator::value_type, + const_iterator::reference, + const_iterator::pointer, + const_iterator::difference_type> const_reverse_iterator; + +#endif /* GLIBMM_HAVE_SUN_REVERSE_ITERATOR */ + +#ifdef GLIBMM_HAVE_ALLOWS_STATIC_INLINE_NPOS + static GLIBMM_API const size_type npos = std::string::npos; +#else + //The IRIX MipsPro compiler says "The indicated constant value is not known", + //so we need to initalize the static member data elsewhere. + static GLIBMM_API const size_type npos; +#endif + + /*! Default constructor, which creates an empty string. + */ + ustring(); + + ~ustring(); + + /*! Construct a ustring as a copy of another ustring. + * @param other A source string. + */ + ustring(const ustring& other); + + /*! Assign the value of another string to this string. + * @param other A source string. + */ + ustring& operator=(const ustring& other); + + /*! Swap contents with another string. + * @param other String to swap with. + */ + void swap(ustring& other); + + /*! Construct a ustring as a copy of another std::string. + * @param src A source <tt>std::string</tt> containing text encoded as UTF-8. + */ + ustring(const std::string& src); + + /*! Construct a ustring as a copy of a substring. + * @param src %Source ustring. + * @param i Index of first character to copy from. + * @param n Number of UTF-8 characters to copy (defaults to copying the remainder). + */ + ustring(const ustring& src, size_type i, size_type n=npos); + + /*! Construct a ustring as a partial copy of a C string. + * @param src %Source C string encoded as UTF-8. + * @param n Number of UTF-8 characters to copy. + */ + ustring(const char* src, size_type n); + + /*! Construct a ustring as a copy of a C string. + * @param src %Source C string encoded as UTF-8. + */ + ustring(const char* src); + + /*! Construct a ustring as multiple characters. + * @param n Number of characters. + * @param uc UCS-4 code point to use. + */ + ustring(size_type n, gunichar uc); + + /*! Construct a ustring as multiple characters. + * @param n Number of characters. + * @param c ASCII character to use. + */ + ustring(size_type n, char c); + + /*! Construct a ustring as a copy of a range. + * @param pbegin Start of range. + * @param pend End of range. + */ + template <class In> ustring(In pbegin, In pend); + + +//! @name Assign new contents. +//! @{ + + ustring& operator=(const std::string& src); + ustring& operator=(const char* src); + ustring& operator=(gunichar uc); + ustring& operator=(char c); + + ustring& assign(const ustring& src); + ustring& assign(const ustring& src, size_type i, size_type n); + ustring& assign(const char* src, size_type n); + ustring& assign(const char* src); + ustring& assign(size_type n, gunichar uc); + ustring& assign(size_type n, char c); + template <class In> ustring& assign(In pbegin, In pend); + +//! @} +//! @name Append to the string. +//! @{ + + ustring& operator+=(const ustring& src); + ustring& operator+=(const char* src); + ustring& operator+=(gunichar uc); + ustring& operator+=(char c); + void push_back(gunichar uc); + void push_back(char c); + + ustring& append(const ustring& src); + ustring& append(const ustring& src, size_type i, size_type n); + ustring& append(const char* src, size_type n); + ustring& append(const char* src); + ustring& append(size_type n, gunichar uc); + ustring& append(size_type n, char c); + template <class In> ustring& append(In pbegin, In pend); + +//! @} +//! @name Insert into the string. +//! @{ + + ustring& insert(size_type i, const ustring& src); + ustring& insert(size_type i, const ustring& src, size_type i2, size_type n); + ustring& insert(size_type i, const char* src, size_type n); + ustring& insert(size_type i, const char* src); + ustring& insert(size_type i, size_type n, gunichar uc); + ustring& insert(size_type i, size_type n, char c); + + iterator insert(iterator p, gunichar uc); + iterator insert(iterator p, char c); + void insert(iterator p, size_type n, gunichar uc); + void insert(iterator p, size_type n, char c); + template <class In> void insert(iterator p, In pbegin, In pend); + +//! @} +//! @name Replace sub-strings. +//! @{ + + ustring& replace(size_type i, size_type n, const ustring& src); + ustring& replace(size_type i, size_type n, const ustring& src, size_type i2, size_type n2); + ustring& replace(size_type i, size_type n, const char* src, size_type n2); + ustring& replace(size_type i, size_type n, const char* src); + ustring& replace(size_type i, size_type n, size_type n2, gunichar uc); + ustring& replace(size_type i, size_type n, size_type n2, char c); + + ustring& replace(iterator pbegin, iterator pend, const ustring& src); + ustring& replace(iterator pbegin, iterator pend, const char* src, size_type n); + ustring& replace(iterator pbegin, iterator pend, const char* src); + ustring& replace(iterator pbegin, iterator pend, size_type n, gunichar uc); + ustring& replace(iterator pbegin, iterator pend, size_type n, char c); + template <class In> ustring& replace(iterator pbegin, iterator pend, In pbegin2, In pend2); + +//! @} +//! @name Erase sub-strings. +//! @{ + + void clear(); + ustring& erase(size_type i, size_type n=npos); + ustring& erase(); + iterator erase(iterator p); + iterator erase(iterator pbegin, iterator pend); + +//! @} +//! @name Compare and collate. +//! @{ + + int compare(const ustring& rhs) const; + int compare(const char* rhs) const; + int compare(size_type i, size_type n, const ustring& rhs) const; + int compare(size_type i, size_type n, const ustring& rhs, size_type i2, size_type n2) const; + int compare(size_type i, size_type n, const char* rhs, size_type n2) const; + int compare(size_type i, size_type n, const char* rhs) const; + + /*! Create a unique sorting key for the UTF-8 string. If you need to + * compare UTF-8 strings regularly, e.g. for sorted containers such as + * <tt>std::set<></tt>, you should consider creating a collate key first + * and compare this key instead of the actual string. + * + * The ustring::compare() methods as well as the relational operators + * <tt>== != < > <= >=</tt> are quite costly + * because they have to deal with %Unicode and the collation rules defined by + * the current locale. Converting both operands to UCS-4 is just the first + * of several costly steps involved when comparing ustrings. So be careful. + */ + std::string collate_key() const; + + /*! Create a unique key for the UTF-8 string that can be used for caseless + * sorting. <tt>ustr.casefold_collate_key()</tt> results in the same string + * as <tt>ustr.casefold().collate_key()</tt>, but the former is likely more + * efficient. + */ + std::string casefold_collate_key() const; + +//! @} +//! @name Extract characters and sub-strings. +//! @{ + + /*! No reference return; use replace() to write characters. */ + value_type operator[](size_type i) const; + + /*! No reference return; use replace() to write characters. @throw std::out_of_range */ + value_type at(size_type i) const; + + inline ustring substr(size_type i=0, size_type n=npos) const; + +//! @} +//! @name Access a sequence of characters. +//! @{ + + iterator begin(); + iterator end(); + const_iterator begin() const; + const_iterator end() const; + reverse_iterator rbegin(); + reverse_iterator rend(); + const_reverse_iterator rbegin() const; + const_reverse_iterator rend() const; + +//! @} +//! @name Find sub-strings. +//! @{ + + size_type find(const ustring& str, size_type i=0) const; + size_type find(const char* str, size_type i, size_type n) const; + size_type find(const char* str, size_type i=0) const; + size_type find(gunichar uc, size_type i=0) const; + size_type find(char c, size_type i=0) const; + + size_type rfind(const ustring& str, size_type i=npos) const; + size_type rfind(const char* str, size_type i, size_type n) const; + size_type rfind(const char* str, size_type i=npos) const; + size_type rfind(gunichar uc, size_type i=npos) const; + size_type rfind(char c, size_type i=npos) const; + +//! @} +//! @name Match against a set of characters. +//! @{ + + size_type find_first_of(const ustring& match, size_type i=0) const; + size_type find_first_of(const char* match, size_type i, size_type n) const; + size_type find_first_of(const char* match, size_type i=0) const; + size_type find_first_of(gunichar uc, size_type i=0) const; + size_type find_first_of(char c, size_type i=0) const; + + size_type find_last_of(const ustri... [truncated message content] |
From: <tbr...@us...> - 2012-03-20 14:10:42
|
Revision: 205 http://xmlwrapp.svn.sourceforge.net/xmlwrapp/?rev=205&view=rev Author: tbrowder2 Date: 2012-03-20 14:10:29 +0000 (Tue, 20 Mar 2012) Log Message: ----------- rename for clarity Added Paths: ----------- trunk/src/libxmlwrappstring/ trunk/src/libxmlwrappstring/xmlwrappstring.cc trunk/src/libxmlwrappstring/xmlwrappstring.h Removed Paths: ------------- trunk/src/libustring/xmlwrappstring.cc trunk/src/libustring/xmlwrappstring.h trunk/src/libxmlwrappstring/ustring.cc trunk/src/libxmlwrappstring/ustring.h Deleted: trunk/src/libustring/xmlwrappstring.cc =================================================================== --- trunk/src/libustring/xmlwrappstring.cc 2012-03-20 14:05:34 UTC (rev 204) +++ trunk/src/libustring/xmlwrappstring.cc 2012-03-20 14:10:29 UTC (rev 205) @@ -1,1418 +0,0 @@ -// -*- c++ -*- -/* $Id$ */ - -/* Copyright (C) 2002 The gtkmm Development Team - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <glibmmconfig.h> -#include <glibmm/ustring.h> -#include <glibmm/convert.h> -#include <glibmm/error.h> -#include <glibmm/utility.h> - -#include <algorithm> -#include <iostream> -#include <cstring> -# include <stdexcept> -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -namespace -{ - -using Glib::ustring; - -// Little helper to make the conversion from gunichar to UTF-8 a one-liner. -// -struct UnicharToUtf8 -{ - char buf[6]; - ustring::size_type len; - - explicit UnicharToUtf8(gunichar uc) - : len (g_unichar_to_utf8(uc, buf)) {} -}; - - -// All utf8_*_offset() functions return npos if offset is out of range. -// The caller should decide if npos is a valid argument and just marks -// the whole string, or if it is not allowed (e.g. for start positions). -// In the latter case std::out_of_range should be thrown, but usually -// std::string will do that for us. - -// First overload: stop on '\0' character. -static -ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset) -{ - if(offset == ustring::npos) - return ustring::npos; - - const char *const utf8_skip = g_utf8_skip; - const char* p = str; - - for(; offset != 0; --offset) - { - const unsigned int c = static_cast<unsigned char>(*p); - - if(c == 0) - return ustring::npos; - - p += utf8_skip[c]; - } - - return (p - str); -} - -// Second overload: stop when reaching maxlen. -static -ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset, - ustring::size_type maxlen) -{ - if(offset == ustring::npos) - return ustring::npos; - - const char *const utf8_skip = g_utf8_skip; - const char *const pend = str + maxlen; - const char* p = str; - - for(; offset != 0; --offset) - { - if(p >= pend) - return ustring::npos; - - p += utf8_skip[static_cast<unsigned char>(*p)]; - } - - return (p - str); -} - -// Third overload: stop when reaching str.size(). -// -inline -ustring::size_type utf8_byte_offset(const std::string& str, ustring::size_type offset) -{ - return utf8_byte_offset(str.data(), offset, str.size()); -} - -// Takes UTF-8 character offset and count in ci and cn. -// Returns the byte offset and count in i and n. -// -struct Utf8SubstrBounds -{ - ustring::size_type i; - ustring::size_type n; - - Utf8SubstrBounds(const std::string& str, ustring::size_type ci, ustring::size_type cn) - : - i (utf8_byte_offset(str, ci)), - n (ustring::npos) - { - if(i != ustring::npos) - n = utf8_byte_offset(str.data() + i, cn, str.size() - i); - } -}; - -// Converts byte offset to UTF-8 character offset. -inline -ustring::size_type utf8_char_offset(const std::string& str, ustring::size_type offset) -{ - if(offset == ustring::npos) - return ustring::npos; - - const char *const pdata = str.data(); - return g_utf8_pointer_to_offset(pdata, pdata + offset); -} - - -// Helper to implement ustring::find_first_of() and find_first_not_of(). -// Returns the UTF-8 character offset, or ustring::npos if not found. -static -ustring::size_type utf8_find_first_of(const std::string& str, ustring::size_type offset, - const char* utf8_match, long utf8_match_size, - bool find_not_of) -{ - const ustring::size_type byte_offset = utf8_byte_offset(str, offset); - if(byte_offset == ustring::npos) - return ustring::npos; - - long ucs4_match_size = 0; - const Glib::ScopedPtr<gunichar> ucs4_match - (g_utf8_to_ucs4_fast(utf8_match, utf8_match_size, &ucs4_match_size)); - - const gunichar *const match_begin = ucs4_match.get(); - const gunichar *const match_end = match_begin + ucs4_match_size; - - const char *const str_begin = str.data(); - const char *const str_end = str_begin + str.size(); - - for(const char* pstr = str_begin + byte_offset; - pstr < str_end; - pstr = g_utf8_next_char(pstr)) - { - const gunichar *const pfound = std::find(match_begin, match_end, g_utf8_get_char(pstr)); - - if((pfound != match_end) != find_not_of) - return offset; - - ++offset; - } - - return ustring::npos; -} - -// Helper to implement ustring::find_last_of() and find_last_not_of(). -// Returns the UTF-8 character offset, or ustring::npos if not found. -static -ustring::size_type utf8_find_last_of(const std::string& str, ustring::size_type offset, - const char* utf8_match, long utf8_match_size, - bool find_not_of) -{ - long ucs4_match_size = 0; - const Glib::ScopedPtr<gunichar> ucs4_match - (g_utf8_to_ucs4_fast(utf8_match, utf8_match_size, &ucs4_match_size)); - - const gunichar *const match_begin = ucs4_match.get(); - const gunichar *const match_end = match_begin + ucs4_match_size; - - const char *const str_begin = str.data(); - const char* pstr = str_begin; - - // Set pstr one byte beyond the actual start position. - const ustring::size_type byte_offset = utf8_byte_offset(str, offset); - pstr += (byte_offset < str.size()) ? byte_offset + 1 : str.size(); - - while(pstr > str_begin) - { - // Move to previous character. - do - --pstr; - while((static_cast<unsigned char>(*pstr) & 0xC0u) == 0x80); - - const gunichar *const pfound = std::find(match_begin, match_end, g_utf8_get_char(pstr)); - - if((pfound != match_end) != find_not_of) - return g_utf8_pointer_to_offset(str_begin, pstr); - } - - return ustring::npos; -} - -} // anonymous namespace - - -namespace Glib -{ - -#ifndef GLIBMM_HAVE_ALLOWS_STATIC_INLINE_NPOS -// Initialize static member here, -// because the compiler did not allow us do it inline. -const ustring::size_type ustring::npos = std::string::npos; -#endif - -/* - * We need our own version of g_utf8_get_char(), because the std::string - * iterator is not necessarily a plain pointer (it's in fact not in GCC's - * libstdc++-v3). Copying the UTF-8 data into a temporary buffer isn't an - * option since this operation is quite time critical. The implementation - * is quite different from g_utf8_get_char() -- both more generic and likely - * faster. - * - * By looking at the first byte of a UTF-8 character one can determine the - * number of bytes used. GLib offers the g_utf8_skip[] array for this purpose, - * but accessing this global variable would, on IA32 at least, introduce - * a function call to fetch the Global Offset Table, plus two levels of - * indirection in order to read the value. Even worse, fetching the GOT is - * always done right at the start of the function instead of the branch that - * actually uses the variable. - * - * Fortunately, there's a better way to get the byte count. As this table - * shows, there's a nice regular pattern in the UTF-8 encoding scheme: - * - * 0x00000000 - 0x0000007F: 0xxxxxxx - * 0x00000080 - 0x000007FF: 110xxxxx 10xxxxxx - * 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx - * 0x00010000 - 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - * 0x00200000 - 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - * 0x04000000 - 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - * - * Except for the single byte case, the number of leading 1-bits equals the - * byte count. All that is needed is to shift the first byte to the left - * until bit 7 becomes 0. Naturally, doing so requires a loop -- but since - * we already have one, no additional cost is introduced. This shifting can - * further be combined with the computation of the bitmask needed to eliminate - * the leading length bits, thus saving yet another register. - * - * Note: If you change this code, it is advisable to also review what the - * compiler makes of it in the assembler output. Except for some pointless - * register moves, the generated code is sufficiently close to the optimum - * with GCC 4.1.2 on x86_64. - */ -gunichar get_unichar_from_std_iterator(std::string::const_iterator pos) -{ - unsigned int result = static_cast<unsigned char>(*pos); - - if((result & 0x80) != 0) - { - unsigned int mask = 0x40; - - do - { - result <<= 6; - const unsigned int c = static_cast<unsigned char>(*++pos); - mask <<= 5; - result += c - 0x80; - } - while((result & mask) != 0); - - result &= mask - 1; - } - - return result; -} - - -/**** Glib::ustring ********************************************************/ - -ustring::ustring() -: - string_ () -{} - -ustring::ustring(const ustring& other) -: - string_ (other.string_) -{} - -ustring::ustring(const ustring& src, ustring::size_type i, ustring::size_type n) -: - string_ () -{ - const Utf8SubstrBounds bounds (src.string_, i, n); - string_.assign(src.string_, bounds.i, bounds.n); -} - -ustring::ustring(const char* src, ustring::size_type n) -: - string_ (src, utf8_byte_offset(src, n)) -{} - -ustring::ustring(const char* src) -: - string_ (src) -{} - -ustring::ustring(ustring::size_type n, gunichar uc) -: - string_ () -{ - if(uc < 0x80) - { - // Optimize the probably most common case. - string_.assign(n, static_cast<char>(uc)); - } - else - { - const UnicharToUtf8 conv (uc); - string_.reserve(n * conv.len); - - for(; n > 0; --n) - string_.append(conv.buf, conv.len); - } -} - -ustring::ustring(ustring::size_type n, char c) -: - string_ (n, c) -{} - -ustring::ustring(const std::string& src) -: - string_ (src) -{} - -ustring::~ustring() -{} - -void ustring::swap(ustring& other) -{ - string_.swap(other.string_); -} - - -/**** Glib::ustring::operator=() *******************************************/ - -ustring& ustring::operator=(const ustring& other) -{ - string_ = other.string_; - return *this; -} - -ustring& ustring::operator=(const std::string& src) -{ - string_ = src; - return *this; -} - -ustring& ustring::operator=(const char* src) -{ - string_ = src; - return *this; -} - -ustring& ustring::operator=(gunichar uc) -{ - const UnicharToUtf8 conv (uc); - string_.assign(conv.buf, conv.len); - return *this; -} - -ustring& ustring::operator=(char c) -{ - string_ = c; - return *this; -} - - -/**** Glib::ustring::assign() **********************************************/ - -ustring& ustring::assign(const ustring& src) -{ - string_ = src.string_; - return *this; -} - -ustring& ustring::assign(const ustring& src, ustring::size_type i, ustring::size_type n) -{ - const Utf8SubstrBounds bounds (src.string_, i, n); - string_.assign(src.string_, bounds.i, bounds.n); - return *this; -} - -ustring& ustring::assign(const char* src, ustring::size_type n) -{ - string_.assign(src, utf8_byte_offset(src, n)); - return *this; -} - -ustring& ustring::assign(const char* src) -{ - string_ = src; - return *this; -} - -ustring& ustring::assign(ustring::size_type n, gunichar uc) -{ - ustring temp (n, uc); - string_.swap(temp.string_); - return *this; -} - -ustring& ustring::assign(ustring::size_type n, char c) -{ - string_.assign(n, c); - return *this; -} - - -/**** Glib::ustring::operator+=() ******************************************/ - -ustring& ustring::operator+=(const ustring& src) -{ - string_ += src.string_; - return *this; -} - -ustring& ustring::operator+=(const char* src) -{ - string_ += src; - return *this; -} - -ustring& ustring::operator+=(gunichar uc) -{ - const UnicharToUtf8 conv (uc); - string_.append(conv.buf, conv.len); - return *this; -} - -ustring& ustring::operator+=(char c) -{ - string_ += c; - return *this; -} - - -/**** Glib::ustring::push_back() *******************************************/ - -void ustring::push_back(gunichar uc) -{ - const UnicharToUtf8 conv (uc); - string_.append(conv.buf, conv.len); -} - -void ustring::push_back(char c) -{ - string_ += c; -} - - -/**** Glib::ustring::append() **********************************************/ - -ustring& ustring::append(const ustring& src) -{ - string_ += src.string_; - return *this; -} - -ustring& ustring::append(const ustring& src, ustring::size_type i, ustring::size_type n) -{ - const Utf8SubstrBounds bounds (src.string_, i, n); - string_.append(src.string_, bounds.i, bounds.n); - return *this; -} - -ustring& ustring::append(const char* src, ustring::size_type n) -{ - string_.append(src, utf8_byte_offset(src, n)); - return *this; -} - -ustring& ustring::append(const char* src) -{ - string_ += src; - return *this; -} - -ustring& ustring::append(ustring::size_type n, gunichar uc) -{ - string_.append(ustring(n, uc).string_); - return *this; -} - -ustring& ustring::append(ustring::size_type n, char c) -{ - string_.append(n, c); - return *this; -} - - -/**** Glib::ustring::insert() **********************************************/ - -ustring& ustring::insert(ustring::size_type i, const ustring& src) -{ - string_.insert(utf8_byte_offset(string_, i), src.string_); - return *this; -} - -ustring& ustring::insert(ustring::size_type i, const ustring& src, - ustring::size_type i2, ustring::size_type n) -{ - const Utf8SubstrBounds bounds2 (src.string_, i2, n); - string_.insert(utf8_byte_offset(string_, i), src.string_, bounds2.i, bounds2.n); - return *this; -} - -ustring& ustring::insert(ustring::size_type i, const char* src, ustring::size_type n) -{ - string_.insert(utf8_byte_offset(string_, i), src, utf8_byte_offset(src, n)); - return *this; -} - -ustring& ustring::insert(ustring::size_type i, const char* src) -{ - string_.insert(utf8_byte_offset(string_, i), src); - return *this; -} - -ustring& ustring::insert(ustring::size_type i, ustring::size_type n, gunichar uc) -{ - string_.insert(utf8_byte_offset(string_, i), ustring(n, uc).string_); - return *this; -} - -ustring& ustring::insert(ustring::size_type i, ustring::size_type n, char c) -{ - string_.insert(utf8_byte_offset(string_, i), n, c); - return *this; -} - -ustring::iterator ustring::insert(ustring::iterator p, gunichar uc) -{ - const size_type offset = p.base() - string_.begin(); - const UnicharToUtf8 conv (uc); - string_.insert(offset, conv.buf, conv.len); - return iterator(string_.begin() + offset); -} - -ustring::iterator ustring::insert(ustring::iterator p, char c) -{ - return iterator(string_.insert(p.base(), c)); -} - -void ustring::insert(ustring::iterator p, ustring::size_type n, gunichar uc) -{ - string_.insert(p.base() - string_.begin(), ustring(n, uc).string_); -} - -void ustring::insert(ustring::iterator p, ustring::size_type n, char c) -{ - string_.insert(p.base(), n, c); -} - - -/**** Glib::ustring::replace() *********************************************/ - -ustring& ustring::replace(ustring::size_type i, ustring::size_type n, const ustring& src) -{ - const Utf8SubstrBounds bounds (string_, i, n); - string_.replace(bounds.i, bounds.n, src.string_); - return *this; -} - -ustring& ustring::replace(ustring::size_type i, ustring::size_type n, - const ustring& src, ustring::size_type i2, ustring::size_type n2) -{ - const Utf8SubstrBounds bounds (string_, i, n); - const Utf8SubstrBounds bounds2 (src.string_, i2, n2); - string_.replace(bounds.i, bounds.n, src.string_, bounds2.i, bounds2.n); - return *this; -} - -ustring& ustring::replace(ustring::size_type i, ustring::size_type n, - const char* src, ustring::size_type n2) -{ - const Utf8SubstrBounds bounds (string_, i, n); - string_.replace(bounds.i, bounds.n, src, utf8_byte_offset(src, n2)); - return *this; -} - -ustring& ustring::replace(ustring::size_type i, ustring::size_type n, const char* src) -{ - const Utf8SubstrBounds bounds (string_, i, n); - string_.replace(bounds.i, bounds.n, src); - return *this; -} - -ustring& ustring::replace(ustring::size_type i, ustring::size_type n, - ustring::size_type n2, gunichar uc) -{ - const Utf8SubstrBounds bounds (string_, i, n); - string_.replace(bounds.i, bounds.n, ustring(n2, uc).string_); - return *this; -} - -ustring& ustring::replace(ustring::size_type i, ustring::size_type n, - ustring::size_type n2, char c) -{ - const Utf8SubstrBounds bounds (string_, i, n); - string_.replace(bounds.i, bounds.n, n2, c); - return *this; -} - -ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, const ustring& src) -{ - string_.replace(pbegin.base(), pend.base(), src.string_); - return *this; -} - -ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, - const char* src, ustring::size_type n) -{ - string_.replace(pbegin.base(), pend.base(), src, utf8_byte_offset(src, n)); - return *this; -} - -ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, const char* src) -{ - string_.replace(pbegin.base(), pend.base(), src); - return *this; -} - -ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, - ustring::size_type n, gunichar uc) -{ - string_.replace(pbegin.base(), pend.base(), ustring(n, uc).string_); - return *this; -} - -ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, - ustring::size_type n, char c) -{ - string_.replace(pbegin.base(), pend.base(), n, c); - return *this; -} - - -/**** Glib::ustring::erase() ***********************************************/ - -void ustring::clear() -{ - string_.erase(); -} - -ustring& ustring::erase(ustring::size_type i, ustring::size_type n) -{ - const Utf8SubstrBounds bounds (string_, i, n); - string_.erase(bounds.i, bounds.n); - return *this; -} - -ustring& ustring::erase() -{ - string_.erase(); - return *this; -} - -ustring::iterator ustring::erase(ustring::iterator p) -{ - ustring::iterator iter_end = p; - ++iter_end; - - return iterator(string_.erase(p.base(), iter_end.base())); -} - -ustring::iterator ustring::erase(ustring::iterator pbegin, ustring::iterator pend) -{ - return iterator(string_.erase(pbegin.base(), pend.base())); -} - - -/**** Glib::ustring::compare() *********************************************/ - -int ustring::compare(const ustring& rhs) const -{ - return g_utf8_collate(string_.c_str(), rhs.string_.c_str()); -} - -int ustring::compare(const char* rhs) const -{ - return g_utf8_collate(string_.c_str(), rhs); -} - -int ustring::compare(ustring::size_type i, ustring::size_type n, const ustring& rhs) const -{ - return ustring(*this, i, n).compare(rhs); -} - -int ustring::compare(ustring::size_type i, ustring::size_type n, - const ustring& rhs, ustring::size_type i2, ustring::size_type n2) const -{ - return ustring(*this, i, n).compare(ustring(rhs, i2, n2)); -} - -int ustring::compare(ustring::size_type i, ustring::size_type n, - const char* rhs, ustring::size_type n2) const -{ - return ustring(*this, i, n).compare(ustring(rhs, n2)); -} - -int ustring::compare(ustring::size_type i, ustring::size_type n, const char* rhs) const -{ - return ustring(*this, i, n).compare(rhs); -} - - -/**** Glib::ustring -- index access ****************************************/ - -ustring::value_type ustring::operator[](ustring::size_type i) const -{ - return g_utf8_get_char(g_utf8_offset_to_pointer(string_.data(), i)); -} - -ustring::value_type ustring::at(ustring::size_type i) const -{ - const size_type byte_offset = utf8_byte_offset(string_, i); - - // Throws std::out_of_range if the index is invalid. - return g_utf8_get_char(&string_.at(byte_offset)); -} - - -/**** Glib::ustring -- iterator access *************************************/ - -ustring::iterator ustring::begin() -{ - return iterator(string_.begin()); -} - -ustring::iterator ustring::end() -{ - return iterator(string_.end()); -} - -ustring::const_iterator ustring::begin() const -{ - return const_iterator(string_.begin()); -} - -ustring::const_iterator ustring::end() const -{ - return const_iterator(string_.end()); -} - -ustring::reverse_iterator ustring::rbegin() -{ - return reverse_iterator(iterator(string_.end())); -} - -ustring::reverse_iterator ustring::rend() -{ - return reverse_iterator(iterator(string_.begin())); -} - -ustring::const_reverse_iterator ustring::rbegin() const -{ - return const_reverse_iterator(const_iterator(string_.end())); -} - -ustring::const_reverse_iterator ustring::rend() const -{ - return const_reverse_iterator(const_iterator(string_.begin())); -} - - -/**** Glib::ustring::find() ************************************************/ - -ustring::size_type ustring::find(const ustring& str, ustring::size_type i) const -{ - return utf8_char_offset(string_, string_.find(str.string_, utf8_byte_offset(string_, i))); -} - -ustring::size_type ustring::find(const char* str, ustring::size_type i, ustring::size_type n) const -{ - return utf8_char_offset(string_, string_.find(str, utf8_byte_offset(string_, i), - utf8_byte_offset(str, n))); -} - -ustring::size_type ustring::find(const char* str, ustring::size_type i) const -{ - return utf8_char_offset(string_, string_.find(str, utf8_byte_offset(string_, i))); -} - -ustring::size_type ustring::find(gunichar uc, ustring::size_type i) const -{ - const UnicharToUtf8 conv (uc); - return utf8_char_offset(string_, string_.find(conv.buf, utf8_byte_offset(string_, i), conv.len)); -} - -ustring::size_type ustring::find(char c, ustring::size_type i) const -{ - return utf8_char_offset(string_, string_.find(c, utf8_byte_offset(string_, i))); -} - - -/**** Glib::ustring::rfind() ***********************************************/ - -ustring::size_type ustring::rfind(const ustring& str, ustring::size_type i) const -{ - return utf8_char_offset(string_, string_.rfind(str.string_, utf8_byte_offset(string_, i))); -} - -ustring::size_type ustring::rfind(const char* str, ustring::size_type i, - ustring::size_type n) const -{ - return utf8_char_offset(string_, string_.rfind(str, utf8_byte_offset(string_, i), - utf8_byte_offset(str, n))); -} - -ustring::size_type ustring::rfind(const char* str, ustring::size_type i) const -{ - return utf8_char_offset(string_, string_.rfind(str, utf8_byte_offset(string_, i))); -} - -ustring::size_type ustring::rfind(gunichar uc, ustring::size_type i) const -{ - const UnicharToUtf8 conv (uc); - return utf8_char_offset(string_, string_.rfind(conv.buf, utf8_byte_offset(string_, i), conv.len)); -} - -ustring::size_type ustring::rfind(char c, ustring::size_type i) const -{ - return utf8_char_offset(string_, string_.rfind(c, utf8_byte_offset(string_, i))); -} - - -/**** Glib::ustring::find_first_of() ***************************************/ - -ustring::size_type ustring::find_first_of(const ustring& match, ustring::size_type i) const -{ - return utf8_find_first_of(string_, i, match.string_.data(), match.string_.size(), false); -} - -ustring::size_type ustring::find_first_of(const char* match, - ustring::size_type i, ustring::size_type n) const -{ - return utf8_find_first_of(string_, i, match, n, false); -} - -ustring::size_type ustring::find_first_of(const char* match, ustring::size_type i) const -{ - return utf8_find_first_of(string_, i, match, -1, false); -} - -ustring::size_type ustring::find_first_of(gunichar uc, ustring::size_type i) const -{ - return find(uc, i); -} - -ustring::size_type ustring::find_first_of(char c, ustring::size_type i) const -{ - return find(c, i); -} - - -/**** Glib::ustring::find_last_of() ****************************************/ - -ustring::size_type ustring::find_last_of(const ustring& match, ustring::size_type i) const -{ - return utf8_find_last_of(string_, i, match.string_.data(), match.string_.size(), false); -} - -ustring::size_type ustring::find_last_of(const char* match, - ustring::size_type i, ustring::size_type n) const -{ - return utf8_find_last_of(string_, i, match, n, false); -} - -ustring::size_type ustring::find_last_of(const char* match, ustring::size_type i) const -{ - return utf8_find_last_of(string_, i, match, -1, false); -} - -ustring::size_type ustring::find_last_of(gunichar uc, ustring::size_type i) const -{ - return rfind(uc, i); -} - -ustring::size_type ustring::find_last_of(char c, ustring::size_type i) const -{ - return rfind(c, i); -} - - -/**** Glib::ustring::find_first_not_of() ***********************************/ - -ustring::size_type ustring::find_first_not_of(const ustring& match, ustring::size_type i) const -{ - return utf8_find_first_of(string_, i, match.string_.data(), match.string_.size(), true); -} - -ustring::size_type ustring::find_first_not_of(const char* match, - ustring::size_type i, ustring::size_type n) const -{ - return utf8_find_first_of(string_, i, match, n, true); -} - -ustring::size_type ustring::find_first_not_of(const char* match, ustring::size_type i) const -{ - return utf8_find_first_of(string_, i, match, -1, true); -} - -// Unfortunately, all of the find_*_not_of() methods for single -// characters need their own special implementation. -// -ustring::size_type ustring::find_first_not_of(gunichar uc, ustring::size_type i) const -{ - const size_type bi = utf8_byte_offset(string_, i); - if(bi != npos) - { - const char *const pbegin = string_.data(); - const char *const pend = pbegin + string_.size(); - - for(const char* p = pbegin + bi; - p < pend; - p = g_utf8_next_char(p), ++i) - { - if(g_utf8_get_char(p) != uc) - return i; - } - } - return npos; -} - -ustring::size_type ustring::find_first_not_of(char c, ustring::size_type i) const -{ - const size_type bi = utf8_byte_offset(string_, i); - if(bi != npos) - { - const char *const pbegin = string_.data(); - const char *const pend = pbegin + string_.size(); - - for(const char* p = pbegin + bi; - p < pend; - p = g_utf8_next_char(p), ++i) - { - if(*p != c) - return i; - } - } - return npos; -} - - -/**** Glib::ustring::find_last_not_of() ************************************/ - -ustring::size_type ustring::find_last_not_of(const ustring& match, ustring::size_type i) const -{ - return utf8_find_last_of(string_, i, match.string_.data(), match.string_.size(), true); -} - -ustring::size_type ustring::find_last_not_of(const char* match, - ustring::size_type i, ustring::size_type n) const -{ - return utf8_find_last_of(string_, i, match, n, true); -} - -ustring::size_type ustring::find_last_not_of(const char* match, ustring::size_type i) const -{ - return utf8_find_last_of(string_, i, match, -1, true); -} - -// Unfortunately, all of the find_*_not_of() methods for single -// characters need their own special implementation. -// -ustring::size_type ustring::find_last_not_of(gunichar uc, ustring::size_type i) const -{ - const char *const pbegin = string_.data(); - const char *const pend = pbegin + string_.size(); - size_type i_cur = 0; - size_type i_found = npos; - - for(const char* p = pbegin; - p < pend && i_cur <= i; - p = g_utf8_next_char(p), ++i_cur) - { - if(g_utf8_get_char(p) != uc) - i_found = i_cur; - } - return i_found; -} - -ustring::size_type ustring::find_last_not_of(char c, ustring::size_type i) const -{ - const char *const pbegin = string_.data(); - const char *const pend = pbegin + string_.size(); - size_type i_cur = 0; - size_type i_found = npos; - - for(const char* p = pbegin; - p < pend && i_cur <= i; - p = g_utf8_next_char(p), ++i_cur) - { - if(*p != c) - i_found = i_cur; - } - return i_found; -} - - -/**** Glib::ustring -- get size and resize *********************************/ - -bool ustring::empty() const -{ - return string_.empty(); -} - -ustring::size_type ustring::size() const -{ - const char *const pdata = string_.data(); - return g_utf8_pointer_to_offset(pdata, pdata + string_.size()); -} - -ustring::size_type ustring::length() const -{ - const char *const pdata = string_.data(); - return g_utf8_pointer_to_offset(pdata, pdata + string_.size()); -} - -ustring::size_type ustring::bytes() const -{ - return string_.size(); -} - -ustring::size_type ustring::capacity() const -{ - return string_.capacity(); -} - -ustring::size_type ustring::max_size() const -{ - return string_.max_size(); -} - -void ustring::resize(ustring::size_type n, gunichar uc) -{ - const size_type size_now = size(); - if(n < size_now) - erase(n, npos); - else if(n > size_now) - append(n - size_now, uc); -} - -void ustring::resize(ustring::size_type n, char c) -{ - const size_type size_now = size(); - if(n < size_now) - erase(n, npos); - else if(n > size_now) - string_.append(n - size_now, c); -} - -void ustring::reserve(ustring::size_type n) -{ - string_.reserve(n); -} - - -/**** Glib::ustring -- C string access *************************************/ - -const char* ustring::data() const -{ - return string_.data(); -} - -const char* ustring::c_str() const -{ - return string_.c_str(); -} - -// Note that copy() requests UTF-8 character offsets as -// parameters, but returns the number of copied bytes. -// -ustring::size_type ustring::copy(char* dest, ustring::size_type n, ustring::size_type i) const -{ - const Utf8SubstrBounds bounds (string_, i, n); - return string_.copy(dest, bounds.n, bounds.i); -} - - -/**** Glib::ustring -- UTF-8 utilities *************************************/ - -bool ustring::validate() const -{ - return (g_utf8_validate(string_.data(), string_.size(), 0) != 0); -} - -bool ustring::validate(ustring::iterator& first_invalid) -{ - const char *const pdata = string_.data(); - const char* valid_end = pdata; - const int is_valid = g_utf8_validate(pdata, string_.size(), &valid_end); - - first_invalid = iterator(string_.begin() + (valid_end - pdata)); - return (is_valid != 0); -} - -bool ustring::validate(ustring::const_iterator& first_invalid) const -{ - const char *const pdata = string_.data(); - const char* valid_end = pdata; - const int is_valid = g_utf8_validate(pdata, string_.size(), &valid_end); - - first_invalid = const_iterator(string_.begin() + (valid_end - pdata)); - return (is_valid != 0); -} - -bool ustring::is_ascii() const -{ - const char* p = string_.data(); - const char *const pend = p + string_.size(); - - for(; p != pend; ++p) - { - if((static_cast<unsigned char>(*p) & 0x80u) != 0) - return false; - } - - return true; -} - -ustring ustring::normalize(NormalizeMode mode) const -{ - const ScopedPtr<char> buf (g_utf8_normalize(string_.data(), string_.size(), - static_cast<GNormalizeMode>(int(mode)))); - return ustring(buf.get()); -} - -ustring ustring::uppercase() const -{ - const ScopedPtr<char> buf (g_utf8_strup(string_.data(), string_.size())); - return ustring(buf.get()); -} - -ustring ustring::lowercase() const -{ - const ScopedPtr<char> buf (g_utf8_strdown(string_.data(), string_.size())); - return ustring(buf.get()); -} - -ustring ustring::casefold() const -{ - const ScopedPtr<char> buf (g_utf8_casefold(string_.data(), string_.size())); - return ustring(buf.get()); -} - -std::string ustring::collate_key() const -{ - const ScopedPtr<char> buf (g_utf8_collate_key(string_.data(), string_.size())); - return std::string(buf.get()); -} - -std::string ustring::casefold_collate_key() const -{ - char *const casefold_buf = g_utf8_casefold(string_.data(), string_.size()); - char *const key_buf = g_utf8_collate_key(casefold_buf, -1); - g_free(casefold_buf); - return std::string(ScopedPtr<char>(key_buf).get()); -} - -/**** Glib::ustring -- Message formatting **********************************/ - -// static -ustring ustring::compose_argv(const Glib::ustring& fmt, int argc, const ustring* const* argv) -{ - std::string::size_type result_size = fmt.raw().size(); - - // Guesstimate the final string size. - for (int i = 0; i < argc; ++i) - result_size += argv[i]->raw().size(); - - std::string result; - result.reserve(result_size); - - const char* const pfmt = fmt.raw().c_str(); - const char* start = pfmt; - - while (const char* const stop = std::strchr(start, '%')) - { - if (stop[1] == '%') - { - result.append(start, stop - start + 1); - start = stop + 2; - } - else - { - const int index = Ascii::digit_value(stop[1]) - 1; - - if (index >= 0 && index < argc) - { - result.append(start, stop - start); - result += argv[index]->raw(); - start = stop + 2; - } - else - { - const char* const next = (stop[1] != '\0') ? g_utf8_next_char(stop + 1) : (stop + 1); - - // Copy invalid substitutions literally to the output. - result.append(start, next - start); - - g_warning("invalid substitution \"%s\" in fmt string \"%s\"", - result.c_str() + result.size() - (next - stop), pfmt); - start = next; - } - } - } - - result.append(start, pfmt + fmt.raw().size() - start); - - return result; -} - -/**** Glib::ustring::SequenceToString **************************************/ - -ustring::SequenceToString<Glib::ustring::iterator,gunichar> - ::SequenceToString(Glib::ustring::iterator pbegin, Glib::ustring::iterator pend) -: - std::string(pbegin.base(), pend.base()) -{} - -ustring::SequenceToString<Glib::ustring::const_iterator,gunichar> - ::SequenceToString(Glib::ustring::const_iterator pbegin, Glib::ustring::const_iterator pend) -: - std::string(pbegin.base(), pend.base()) -{} - -/**** Glib::ustring::FormatStream ******************************************/ - -ustring::FormatStream::FormatStream() -: - stream_ () -{} - -ustring::FormatStream::~FormatStream() -{} - -ustring ustring::FormatStream::to_string() const -{ - GError* error = 0; - -#ifdef GLIBMM_HAVE_WIDE_STREAM - const std::wstring str = stream_.str(); - -# if defined(__STDC_ISO_10646__) && SIZEOF_WCHAR_T == 4 - // Avoid going through iconv if wchar_t always contains UCS-4. - glong n_bytes = 0; - const ScopedPtr<char> buf (g_ucs4_to_utf8(reinterpret_cast<const gunichar*>(str.data()), - str.size(), 0, &n_bytes, &error)); -# elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2 - // Avoid going through iconv if wchar_t always contains UTF-16. - glong n_bytes = 0; - const ScopedPtr<char> buf (g_utf16_to_utf8(reinterpret_cast<const gunichar2*>(str.data()), - str.size(), 0, &n_bytes, &error)); -# else - gsize n_bytes = 0; - const ScopedPtr<char> buf (g_convert(reinterpret_cast<const char*>(str.data()), - str.size() * sizeof(std::wstring::value_type), - "UTF-8", "WCHAR_T", 0, &n_bytes, &error)); -# endif /* !(__STDC_ISO_10646__ || G_OS_WIN32) */ - -#else /* !GLIBMM_HAVE_WIDE_STREAM */ - const std::string str = stream_.str(); - - gsize n_bytes = 0; - const ScopedPtr<char> buf (g_locale_to_utf8(str.data(), str.size(), 0, &n_bytes, &error)); -#endif /* !GLIBMM_HAVE_WIDE_STREAM */ - - if (error) - { - Glib::Error::throw_exception(error); - } - - return ustring(buf.get(), buf.get() + n_bytes); -} - -/**** Glib::ustring -- stream I/O operators ********************************/ - -std::istream& operator>>(std::istream& is, Glib::ustring& utf8_string) -{ - std::string str; - is >> str; - - GError* error = 0; - gsize n_bytes = 0; - const ScopedPtr<char> buf (g_locale_to_utf8(str.data(), str.size(), 0, &n_bytes, &error)); - - if (error) - { - Glib::Error::throw_exception(error); - } - - utf8_string.assign(buf.get(), buf.get() + n_bytes); - - return is; -} - -std::ostream& operator<<(std::ostream& os, const Glib::ustring& utf8_string) -{ - GError* error = 0; - const ScopedPtr<char> buf (g_locale_from_utf8(utf8_string.raw().data(), - utf8_string.raw().size(), 0, 0, &error)); - if (error) - { - Glib::Error::throw_exception(error); - } - - // This won't work if the string contains NUL characters. Unfortunately, - // std::ostream::write() ignores format flags, so we cannot use that. - // The only option would be to create a temporary std::string. However, - // even then GCC's libstdc++-v3 prints only the characters up to the first - // NUL. Given this, there doesn't seem much of a point in allowing NUL in - // formatted output. The semantics would be unclear anyway: what's the - // screen width of a NUL? - os << buf.get(); - - return os; -} - -#ifdef GLIBMM_HAVE_WIDE_STREAM - -std::wistream& operator>>(std::wistream& is, ustring& utf8_string) -{ - GError* error = 0; - - std::wstring wstr; - is >> wstr; - -#if defined(__STDC_ISO_10646__) && SIZEOF_WCHAR_T == 4 - // Avoid going through iconv if wchar_t always contains UCS-4. - glong n_bytes = 0; - const ScopedPtr<char> buf (g_ucs4_to_utf8(reinterpret_cast<const gunichar*>(wstr.data()), - wstr.size(), 0, &n_bytes, &error)); -#elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2 - // Avoid going through iconv if wchar_t always contains UTF-16. - glong n_bytes = 0; - const ScopedPtr<char> buf (g_utf16_to_utf8(reinterpret_cast<const gunichar2*>(wstr.data()), - wstr.size(), 0, &n_bytes, &error)); -#else - gsize n_bytes = 0; - const ScopedPtr<char> buf (g_convert(reinterpret_cast<const char*>(wstr.data()), - wstr.size() * sizeof(std::wstring::value_type), - "UTF-8", "WCHAR_T", 0, &n_bytes, &error)); -#endif // !(__STDC_ISO_10646__ || G_OS_WIN32) - - if (error) - { - Glib::Error::throw_exception(error); - } - - utf8_string.assign(buf.get(), buf.get() + n_bytes); - - return is; -} - -std::wostream& operator<<(std::wostream& os, const ustring& utf8_string) -{ - GError* error = 0; - -#if defined(__STDC_ISO_10646__) && SIZEOF_WCHAR_T == 4 - // Avoid going through iconv if wchar_t always contains UCS-4. - const ScopedPtr<gunichar> buf (g_utf8_to_ucs4(utf8_string.raw().data(), - utf8_string.raw().size(), 0, 0, &error)); -#elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2 - // Avoid going through iconv if wchar_t always contains UTF-16. - const ScopedPtr<gunichar2> buf (g_utf8_to_utf16(utf8_string.raw().data(), - utf8_string.raw().size(), 0, 0, &error)); -#else - // TODO: For some reason the conversion from UTF-8 to WCHAR_T doesn't work - // with g_convert(), while iconv on the command line handles it just fine. - // Maybe a bug in GLib? - const ScopedPtr<char> buf (g_convert(utf8_string.raw().data(), utf8_string.raw().size(), - "WCHAR_T", "UTF-8", 0, 0, &error)); -#endif // !(__STDC_ISO_10646__ || G_OS_WIN32) - - if (error) - { - Glib::Error::throw_exception(error); - } - - // This won't work if the string contains NUL characters. Unfortunately, - // std::wostream::write() ignores format flags, so we cannot use that. - // The only option would be to create a temporary std::wstring. However, - // even then GCC's libstdc++-v3 prints only the characters up to the first - // NUL. Given this, there doesn't seem much of a point in allowing NUL in - // formatted output. The semantics would be unclear anyway: what's the - // screen width of a NUL? - os << reinterpret_cast<wchar_t*>(buf.get()); - - return os; -} - -#endif /* GLIBMM_HAVE_WIDE_STREAM */ - -} // namespace Glib Deleted: trunk/src/libustring/xmlwrappstring.h =================================================================== --- trunk/src/libustring/xmlwrappstring.h 2012-03-20 14:05:34 UTC (rev 204) +++ trunk/src/libustring/xmlwrappstring.h 2012-03-20 14:10:29 UTC (rev 205) @@ -1,1615 +0,0 @@ -// -*- c++ -*- -#ifndef _GLIBMM_USTRING_H -#define _GLIBMM_USTRING_H - -/* $Id$ */ - -/* Copyright (C) 2002 The gtkmm Development Team - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <glibmmconfig.h> -#include <glibmm/unicode.h> -#include <glib.h> - -#include <iosfwd> -#include <iterator> -#include <sstream> -#include <string> -#ifndef GLIBMM_HAVE_STD_ITERATOR_TRAITS -#include <cstddef> /* for ptrdiff_t */ -#endif - -namespace Glib -{ - -#ifndef DOXYGEN_SHOULD_SKIP_THIS -#ifndef GLIBMM_HAVE_STD_ITERATOR_TRAITS - -template <class T> -struct IteratorTraits -{ - typedef typename T::iterator_category iterator_category; - typedef typename T::value_type value_type; - typedef typename T::difference_type difference_type; - typedef typename T::pointer pointer; - typedef typename T::reference reference; -}; - -template <class T> -struct IteratorTraits<T*> -{ - typedef std::random_access_iterator_tag iterator_category; - typedef T value_type; - typedef ptrdiff_t difference_type; - typedef T* pointer; - typedef T& reference; -}; - -template <class T> -struct IteratorTraits<const T*> -{ - typedef std::random_access_iterator_tag iterator_category; - typedef T value_type; - typedef ptrdiff_t difference_type; - typedef const T* pointer; - typedef const T& reference; -}; - -#endif /* GLIBMM_HAVE_STD_ITERATOR_TRAITS */ -#endif /* DOXYGEN_SHOULD_SKIP_THIS */ - - -/** The iterator type of Glib::ustring. - * Note this is not a random access iterator but a bidirectional one, - * since all index operations need to iterate over the UTF-8 data. Use - * std::advance() to move to a certain position. However, all of the - * relational operators are available: - * <tt>== != < > <= >=</tt> - * - * A writeable iterator isn't provided because: The number of bytes of - * the old UTF-8 character and the new one to write could be different. - * Therefore, any write operation would invalidate all other iterators - * pointing into the same string. - */ -template <class T> -class ustring_Iterator -{ -public: - typedef std::bidirectional_iterator_tag iterator_category; - typedef gunichar value_type; - typedef std::string::difference_type difference_type; - typedef value_type reference; - typedef void pointer; - - inline ustring_Iterator(); - inline ustring_Iterator(const ustring_Iterator<std::string::iterator>& other); - - inline value_type operator*() const; - - inline ustring_Iterator<T> & operator++(); - inline const ustring_Iterator<T> operator++(int); - inline ustring_Iterator<T> & operator--(); - inline const ustring_Iterator<T> operator--(int); - - explicit inline ustring_Iterator(T pos); - inline T base() const; - -private: - T pos_; -}; - - -/** Extract a UCS-4 character from UTF-8 data. - * Convert a single UTF-8 (multibyte) character starting at @p pos to - * a UCS-4 wide character. This may read up to 6 bytes after the start - * position, depending on the UTF-8 character width. You have to make - * sure the source contains at least one valid UTF-8 character. - * - * This is mainly used by the implementation of Glib::ustring::iterator, - * but it might be useful as utility function if you prefer using - * std::string even for UTF-8 encoding. - */ -gunichar get_unichar_from_std_iterator(std::string::const_iterator pos) G_GNUC_PURE; - - -/** Glib::ustring has much the same interface as std::string, but contains - * %Unicode characters encoded as UTF-8. - * - * @par About UTF-8 and ASCII - * @par - * The standard character set ANSI_X3.4-1968 -- more commonly known as - * ASCII -- is a subset of UTF-8. So, if you want to, you can use - * Glib::ustring without even thinking about UTF-8. - * @par - * Whenever ASCII is mentioned in this manual, we mean the @em real ASCII - * (i.e. as defined in ANSI_X3.4-1968), which contains only 7-bit characters. - * Glib::ustring can @em not be used with ASCII-compatible extended 8-bit - * charsets like ISO-8859-1. It's a good idea to avoid string literals - * containing non-ASCII characters (e.g. German umlauts) in source code, - * or at least you should use UTF-8 literals. - * @par - * You can find a detailed UTF-8 and %Unicode FAQ here: - * http://www.cl.cam.ac.uk/~mgk25/unicode.html - * - * @par Glib::ustring vs. std::string - * @par - * Glib::ustring has implicit type conversions to and from std::string. - * These conversions do @em not convert to/from the current locale (see - * Glib::locale_from_utf8() and Glib::locale_to_utf8() if you need that). You - * can always use std::string instead of Glib::ustring -- however, using - * std::string with multi-byte characters is quite hard. For instance, - * <tt>std::string::operator[]</tt> might return a byte in the middle of a - * character, and <tt>std::string::length()</tt> returns the number of bytes - * rather than characters. So don't do that without a good reason. - * @par - * In a perfect world the C++ Standard Library would contain a UTF-8 string - * class. Unfortunately, the C++ standard doesn't mention UTF-8 at all. Note - * that std::wstring is not a UTF-8 string class because it contains only - * fixed-width characters (where width could be 32, 16, or even 8 bits). - * - * @par Glib::ustring and stream input/output - * @par - * The stream I/O operators, that is operator<<() and operator>>(), perform - * implicit charset conversion to/from the current locale. If that's not - * what you intented (e.g. when writing to a configuration file that should - * always be UTF-8 encoded) use ustring::raw() to override this behaviour. - * @par - * If you're using std::ostringstream to build strings for display in the - * user interface, you must convert the result back to UTF-8 as shown below: - * @code - * std::ostringstream output; - * output.imbue(std::locale("")); // use the user's locale for this stream - * output << percentage << " % done"; - * label->set_text(Glib::locale_to_utf8(output.str())); - * @endcode - * - * @par Formatted output and internationalization - * @par - * The methods ustring::compose() and ustring::format() provide a convenient - * and powerful alternative to string streams, as shown in the example below. - * Refer to the method documentation of compose() and format() for details. - * @code - * using Glib::ustring; - * - * ustring message = ustring::compose("%1 is lower than 0x%2.", - * 12, ustring::format(std::hex, 16)); - * @endcode - * - * @par Implementation notes - * @par - * Glib::ustring does not inherit from std::string, because std::string was - * intended to be a final class. For instance, it does not have a virtual - * destructor. Also, a HAS-A relationship is more appropriate because - * ustring can't just enhance the std::string interface. Rather, it has to - * reimplement the interface so that all operations are based on characters - * instead of bytes. - */ -class ustring -{ -public: - typedef std::string::size_type size_type; - typedef std::string::difference_type difference_type; - - typedef gunichar value_type; - typedef gunichar & reference; - typedef const gunichar & const_reference; - - typedef ustring_Iterator<std::string::iterator> iterator; - typedef ustring_Iterator<std::string::const_iterator> const_iterator; - -#ifndef GLIBMM_HAVE_SUN_REVERSE_ITERATOR - - typedef std::reverse_iterator<iterator> reverse_iterator; - typedef std::reverse_iterator<const_iterator> const_reverse_iterator; - -#else - - typedef std::reverse_iterator<iterator, - iterator::iterator_category, - iterator::value_type, - iterator::reference, - iterator::pointer, - iterator::difference_type> reverse_iterator; - typedef std::reverse_iterator<const_iterator, - const_iterator::iterator_category, - const_iterator::value_type, - const_iterator::reference, - const_iterator::pointer, - const_iterator::difference_type> const_reverse_iterator; - -#endif /* GLIBMM_HAVE_SUN_REVERSE_ITERATOR */ - -#ifdef GLIBMM_HAVE_ALLOWS_STATIC_INLINE_NPOS - static GLIBMM_API const size_type npos = std::string::npos; -#else - //The IRIX MipsPro compiler says "The indicated constant value is not known", - //so we need to initalize the static member data elsewhere. - static GLIBMM_API const size_type npos; -#endif - - /*! Default constructor, which creates an empty string. - */ - ustring(); - - ~ustring(); - - /*! Construct a ustring as a copy of another ustring. - * @param other A source string. - */ - ustring(const ustring& other); - - /*! Assign the value of another string to this string. - * @param other A source string. - */ - ustring& operator=(const ustring& other); - - /*! Swap contents with another string. - * @param other String to swap with. - */ - void swap(ustring& other); - - /*! Construct a ustring as a copy of another std::string. - * @param src A source <tt>std::string</tt> containing text encoded as UTF-8. - */ - ustring(const std::string& src); - - /*! Construct a ustring as a copy of a substring. - * @param src %Source ustring. - * @param i Index of first character to copy from. - * @param n Number of UTF-8 characters to copy (defaults to copying the remainder). - */ - ustring(const ustring& src, size_type i, size_type n=npos); - - /*! Construct a ustring as a partial copy of a C string. - * @param src %Source C string encoded as UTF-8. - * @param n Number of UTF-8 characters to copy. - */ - ustring(const char* src, size_type n); - - /*! Construct a ustring as a copy of a C string. - * @param src %Source C string encoded as UTF-8. - */ - ustring(const char* src); - - /*! Construct a ustring as multiple characters. - * @param n Number of characters. - * @param uc UCS-4 code point to use. - */ - ustring(size_type n, gunichar uc); - - /*! Construct a ustring as multiple characters. - * @param n Number of characters. - * @param c ASCII character to use. - */ - ustring(size_type n, char c); - - /*! Construct a ustring as a copy of a range. - * @param pbegin Start of range. - * @param pend End of range. - */ - template <class In> ustring(In pbegin, In pend); - - -//! @name Assign new contents. -//! @{ - - ustring& operator=(const std::string& src); - ustring& operator=(const char* src); - ustring& operator=(gunichar uc); - ustring& operator=(char c); - - ustring& assign(const ustring& src); - ustring& assign(const ustring& src, size_type i, size_type n); - ustring& assign(const char* src, size_type n); - ustring& assign(const char* src); - ustring& assign(size_type n, gunichar uc); - ustring& assign(size_type n, char c); - template <class In> ustring& assign(In pbegin, In pend); - -//! @} -//! @name Append to the string. -//! @{ - - ustring& operator+=(const ustring& src); - ustring& operator+=(const char* src); - ustring& operator+=(gunichar uc); - ustring& operator+=(char c); - void push_back(gunichar uc); - void push_back(char c); - - ustring& append(const ustring& src); - ustring& append(const ustring& src, size_type i, size_type n); - ustring& append(const char* src, size_type n); - ustring& append(const char* src); - ustring& append(size_type n, gunichar uc); - ustring& append(size_type n, char c); - template <class In> ustring& append(In pbegin, In pend); - -//! @} -//! @name Insert into the string. -//! @{ - - ustring& insert(size_type i, const ustring& src); - ustring& insert(size_type i, const ustring& src, size_type i2, size_type n); - ustring& insert(size_type i, const char* src, size_type n); - ustring& insert(size_type i, const char* src); - ustring& insert(size_type i, size_type n, gunichar uc); - ustring& insert(size_type i, size_type n, char c); - - iterator insert(iterator p, gunichar uc); - iterator insert(iterator p, char c); - void insert(iterator p, size_type n, gunichar uc); - void insert(iterator p, size_type n, char c); - template <class In> void insert(iterator p, In pbegin, In pend); - -//! @} -//! @name Replace sub-strings. -//! @{ - - ustring& replace(size_type i, size_type n, const ustring& src); - ustring& replace(size_type i, size_type n, const ustring& src, size_type i2, size_type n2); - ustring& replace(size_type i, size_type n, const char* src, size_type n2); - ustring& replace(size_type i, size_type n, const char* src); - ustring& replace(size_type i, size_type n, size_type n2, gunichar uc); - ustring& replace(size_type i, size_type n, size_type n2, char c); - - ustring& replace(iterator pbegin, iterator pend, const ustring& src); - ustring& replace(iterator pbegin, iterator pend, const char* src, size_type n); - ustring& replace(iterator pbegin, iterator pend, const char* src); - ustring& replace(iterator pbegin, iterator pend, size_type n, gunichar uc); - ustring& replace(iterator pbegin, iterator pend, size_type n, char c); - template <class In> ustring& replace(iterator pbegin, iterator pend, In pbegin2, In pend2); - -//! @} -//! @name Erase sub-strings. -//! @{ - - void clear(); - ustring& erase(size_type i, size_type n=npos); - ustring& erase(); - iterator erase(iterator p); - iterator erase(iterator pbegin, iterator pend); - -//! @} -//! @name Compare and collate. -//! @{ - - int compare(const ustring& rhs) const; - int compare(const char* rhs) const; - int compare(size_type i, size_type n, const ustring& rhs) const; - int compare(size_type i, size_type n, const ustring& rhs, size_type i2, size_type n2) const; - int compare(size_type i, size_type n, const char* rhs, size_type n2) const; - int compare(size_type i, size_type n, const char* rhs) const; - - /*! Create a unique sorting key for the UTF-8 string. If you need to - * compare UTF-8 strings regularly, e.g. for sorted containers such as - * <tt>std::set<></tt>, you should consider creating a collate key first - * and compare this key instead of the actual string. - * - * The ustring::compare() methods as well as the relational operators - * <tt>== != < > <= >=</tt> are quite costly - * because they have to deal with %Unicode and the collation rules defined by - * the current locale. Converting both operands to UCS-4 is just the first - * of several costly steps involved when comparing ustrings. So be careful. - */ - std::string collate_key() const; - - /*! Create a unique key for the UTF-8 string that can be used for caseless - * sorting. <tt>ustr.casefold_collate_key()</tt> results in the same string - * as <tt>ustr.casefold().collate_key()</tt>, but the former is likely more - * efficient. - */ - std::string casefold_collate_key() const; - -//! @} -//! @name Extract characters and sub-strings. -//! @{ - - /*! No reference return; use replace() to write characters. */ - value_type operator[](size_type i) const; - - /*! No reference return; use replace() to write characters. @throw std::out_of_range */ - value_type at(size_type i) const; - - inline ustring substr(size_type i=0, size_type n=npos) const; - -//! @} -//! @name Access a sequence of characters. -//! @{ - - iterator begin(); - iterator end(); - const_iterator begin() const; - const_iterator end() const; - reverse_iterator rbegin(); - reverse_iterator rend(); - const_reverse_iterator rbegin() const; - const_reverse_iterator rend() const; - -//! @} -//! @name Find sub-strings. -//! @{ - - size_type find(const ustring& str, size_type i=0) const; - size_type find(const char* str, size_type i, size_type n) const; - size_type find(const char* str, size_type i=0) const; - size_type find(gunichar uc, size_type i=0) const; - size_type find(char c, size_type i=0) const; - - size_type rfind(const ustring& str, size_type i=npos) const; - size_type rfind(const char* str, size_type i, size_type n) const; - size_type rfind(const char* str, size_type i=npos) const; - size_type rfind(gunichar uc, size_type i=npos) const; - size_type rfind(char c, size_type i=npos) const; - -//! @} -//! @name Match against a set of characters. -//! @{ - - size_type find_first_of(const ustring& match, size_type i=0) const; - size_type find_first_of(const char* match, size_type i, size_type n) const; - size_type find_first_of(const char* match, size_type i=0) const; - size_type find_first_of(gunichar uc, size_type i=0) const; - size_type find_first_of(char c, size_type i=0) const; - - size_type find_last_of(const ustring& match, size_type i=npos) const; - size_type find_last_of(const char* match, size_type i, size_type n) const; - size_type find_last_of(const char* match, size_type i=npos) const; - size_type find_last_of(gunichar uc, size_type i=npos) const; - size_type find_last_of(char c, size_type i=npos) const; - - size_type find_first_not_of(const ustring& match, size_type i=0) const; - size_type find_first_not_of(const char* match, size_type i, size_type n) const; - size_type find_first_not_of(const char* match, size_type i=0) const; - size_type find_first_not_of(gunichar uc, size_type i=0) const; - size_type find_first_not_of(char c, size_type i=0) const; - - size_type find_last_not_of(const ustring& match, size_type i=npos) const; - size_type find_last_not_of(const char* match, size_type i, size_type n) const; - size_type find_last_not_of(const char* match, size_type i=npos) const; - size_type find_last_not_of(gunichar uc, size_type i=npos) const; - size_type find_last_not_of(char c, size_type i=npos) const; - -//! @} -//! @name Retrieve the string's size. -//! @{ - - /** Returns true if the string is empty. Equivalent to *this =... [truncated message content] |