[xmlwrapp-commits] SF.net SVN: xmlwrapp:[204] trunk/src/libustring

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Revision: 204
          http://xmlwrapp.svn.sourceforge.net/xmlwrapp/?rev=204&view=rev
Author:   tbrowder2
Date:     2012-03-20 14:05:34 +0000 (Tue, 20 Mar 2012)
Log Message:
-----------
rename for clarity

Added Paths:
-----------
    trunk/src/libustring/xmlwrappstring.cc
    trunk/src/libustring/xmlwrappstring.h

Removed Paths:
-------------
    trunk/src/libustring/ustring.cc
    trunk/src/libustring/ustring.h

Deleted: trunk/src/libustring/ustring.cc
===================================================================

--- trunk/src/libustring/ustring.cc	2012-03-20 13:49:06 UTC (rev 203)
+++ trunk/src/libustring/ustring.cc	2012-03-20 14:05:34 UTC (rev 204)
@@ -1,1418 +0,0 @@
-// -*- c++ -*-
-/* $Id$ */
-
-/* Copyright (C) 2002 The gtkmm Development Team
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <glibmmconfig.h>
-#include <glibmm/ustring.h>
-#include <glibmm/convert.h>
-#include <glibmm/error.h>
-#include <glibmm/utility.h>
-
-#include <algorithm>
-#include <iostream>
-#include <cstring>
-# include <stdexcept>
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-namespace
-{
-
-using Glib::ustring;
-
-// Little helper to make the conversion from gunichar to UTF-8 a one-liner.
-//
-struct UnicharToUtf8
-{
-  char                buf[6];
-  ustring::size_type  len;
-
-  explicit UnicharToUtf8(gunichar uc)
-    : len (g_unichar_to_utf8(uc, buf)) {}
-};
-
-
-// All utf8_*_offset() functions return npos if offset is out of range.
-// The caller should decide if npos is a valid argument and just marks
-// the whole string, or if it is not allowed (e.g. for start positions).
-// In the latter case std::out_of_range should be thrown, but usually
-// std::string will do that for us.
-
-// First overload: stop on '\0' character.
-static
-ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset)
-{
-  if(offset == ustring::npos)
-    return ustring::npos;
-
-  const char *const utf8_skip = g_utf8_skip;
-  const char* p = str;
-
-  for(; offset != 0; --offset)
-  {
-    const unsigned int c = static_cast<unsigned char>(*p);
-
-    if(c == 0)
-      return ustring::npos;
-
-    p += utf8_skip[c];
-  }
-
-  return (p - str);
-}
-
-// Second overload: stop when reaching maxlen.
-static
-ustring::size_type utf8_byte_offset(const char* str, ustring::size_type offset,
-                                    ustring::size_type maxlen)
-{
-  if(offset == ustring::npos)
-    return ustring::npos;
-
-  const char *const utf8_skip = g_utf8_skip;
-  const char *const pend = str + maxlen;
-  const char* p = str;
-
-  for(; offset != 0; --offset)
-  {
-    if(p >= pend)
-      return ustring::npos;
-
-    p += utf8_skip[static_cast<unsigned char>(*p)];
-  }
-
-  return (p - str);
-}
-
-// Third overload: stop when reaching str.size().
-//
-inline
-ustring::size_type utf8_byte_offset(const std::string& str, ustring::size_type offset)
-{
-  return utf8_byte_offset(str.data(), offset, str.size());
-}
-
-// Takes UTF-8 character offset and count in ci and cn.
-// Returns the byte offset and count in i and n.
-//
-struct Utf8SubstrBounds
-{
-  ustring::size_type i;
-  ustring::size_type n;
-
-  Utf8SubstrBounds(const std::string& str, ustring::size_type ci, ustring::size_type cn)
-  :
-    i (utf8_byte_offset(str, ci)),
-    n (ustring::npos)
-  {
-    if(i != ustring::npos)
-      n = utf8_byte_offset(str.data() + i, cn, str.size() - i);
-  }
-};
-
-// Converts byte offset to UTF-8 character offset.
-inline
-ustring::size_type utf8_char_offset(const std::string& str, ustring::size_type offset)
-{
-  if(offset == ustring::npos)
-    return ustring::npos;
-
-  const char *const pdata = str.data();
-  return g_utf8_pointer_to_offset(pdata, pdata + offset);
-}
-
-
-// Helper to implement ustring::find_first_of() and find_first_not_of().
-// Returns the UTF-8 character offset, or ustring::npos if not found.
-static
-ustring::size_type utf8_find_first_of(const std::string& str, ustring::size_type offset,
-                                      const char* utf8_match, long utf8_match_size,
-                                      bool find_not_of)
-{
-  const ustring::size_type byte_offset = utf8_byte_offset(str, offset);
-  if(byte_offset == ustring::npos)
-    return ustring::npos;
-
-  long ucs4_match_size = 0;
-  const Glib::ScopedPtr<gunichar> ucs4_match
-      (g_utf8_to_ucs4_fast(utf8_match, utf8_match_size, &ucs4_match_size));
-
-  const gunichar *const match_begin = ucs4_match.get();
-  const gunichar *const match_end   = match_begin + ucs4_match_size;
-
-  const char *const str_begin = str.data();
-  const char *const str_end   = str_begin + str.size();
-
-  for(const char* pstr = str_begin + byte_offset;
-      pstr < str_end;
-      pstr = g_utf8_next_char(pstr))
-  {
-    const gunichar *const pfound = std::find(match_begin, match_end, g_utf8_get_char(pstr));
-
-    if((pfound != match_end) != find_not_of)
-      return offset;
-
-    ++offset;
-  }
-
-  return ustring::npos;
-}
-
-// Helper to implement ustring::find_last_of() and find_last_not_of().
-// Returns the UTF-8 character offset, or ustring::npos if not found.
-static
-ustring::size_type utf8_find_last_of(const std::string& str, ustring::size_type offset,
-                                     const char* utf8_match, long utf8_match_size,
-                                     bool find_not_of)
-{
-  long ucs4_match_size = 0;
-  const Glib::ScopedPtr<gunichar> ucs4_match
-      (g_utf8_to_ucs4_fast(utf8_match, utf8_match_size, &ucs4_match_size));
-
-  const gunichar *const match_begin = ucs4_match.get();
-  const gunichar *const match_end   = match_begin + ucs4_match_size;
-
-  const char *const str_begin = str.data();
-  const char* pstr = str_begin;
-
-  // Set pstr one byte beyond the actual start position.
-  const ustring::size_type byte_offset = utf8_byte_offset(str, offset);
-  pstr += (byte_offset < str.size()) ? byte_offset + 1 : str.size();
-
-  while(pstr > str_begin)
-  {
-    // Move to previous character.
-    do
-      --pstr;
-    while((static_cast<unsigned char>(*pstr) & 0xC0u) == 0x80);
-
-    const gunichar *const pfound = std::find(match_begin, match_end, g_utf8_get_char(pstr));
-
-    if((pfound != match_end) != find_not_of)
-      return g_utf8_pointer_to_offset(str_begin, pstr);
-  }
-
-  return ustring::npos;
-}
-
-} // anonymous namespace
-
-
-namespace Glib
-{
-
-#ifndef GLIBMM_HAVE_ALLOWS_STATIC_INLINE_NPOS
-// Initialize static member here, 
-// because the compiler did not allow us do it inline.
-const ustring::size_type ustring::npos = std::string::npos;
-#endif
-
-/*
- * We need our own version of g_utf8_get_char(), because the std::string
- * iterator is not necessarily a plain pointer (it's in fact not in GCC's
- * libstdc++-v3).  Copying the UTF-8 data into a temporary buffer isn't an
- * option since this operation is quite time critical.  The implementation
- * is quite different from g_utf8_get_char() -- both more generic and likely
- * faster.
- *
- * By looking at the first byte of a UTF-8 character one can determine the
- * number of bytes used.  GLib offers the g_utf8_skip[] array for this purpose,
- * but accessing this global variable would, on IA32 at least, introduce
- * a function call to fetch the Global Offset Table, plus two levels of
- * indirection in order to read the value.  Even worse, fetching the GOT is
- * always done right at the start of the function instead of the branch that
- * actually uses the variable.
- *
- * Fortunately, there's a better way to get the byte count.  As this table
- * shows, there's a nice regular pattern in the UTF-8 encoding scheme:
- *
- * 0x00000000 - 0x0000007F: 0xxxxxxx
- * 0x00000080 - 0x000007FF: 110xxxxx 10xxxxxx
- * 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
- * 0x00010000 - 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- * 0x00200000 - 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
- * 0x04000000 - 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
- *
- * Except for the single byte case, the number of leading 1-bits equals the
- * byte count.  All that is needed is to shift the first byte to the left
- * until bit 7 becomes 0.  Naturally, doing so requires a loop -- but since
- * we already have one, no additional cost is introduced.  This shifting can
- * further be combined with the computation of the bitmask needed to eliminate
- * the leading length bits, thus saving yet another register.
- *
- * Note:  If you change this code, it is advisable to also review what the
- * compiler makes of it in the assembler output.  Except for some pointless
- * register moves, the generated code is sufficiently close to the optimum
- * with GCC 4.1.2 on x86_64.
- */
-gunichar get_unichar_from_std_iterator(std::string::const_iterator pos)
-{
-  unsigned int result = static_cast<unsigned char>(*pos);
-
-  if((result & 0x80) != 0)
-  {
-    unsigned int mask = 0x40;
-
-    do
-    {
-      result <<= 6;
-      const unsigned int c = static_cast<unsigned char>(*++pos);
-      mask   <<= 5;
-      result  += c - 0x80;
-    }
-    while((result & mask) != 0);
-
-    result &= mask - 1;
-  }
-
-  return result;
-}
-
-
-/**** Glib::ustring ********************************************************/
-
-ustring::ustring()
-:
-  string_ ()
-{}
-
-ustring::ustring(const ustring& other)
-:
-  string_ (other.string_)
-{}
-
-ustring::ustring(const ustring& src, ustring::size_type i, ustring::size_type n)
-:
-  string_ ()
-{
-  const Utf8SubstrBounds bounds (src.string_, i, n);
-  string_.assign(src.string_, bounds.i, bounds.n);
-}
-
-ustring::ustring(const char* src, ustring::size_type n)
-:
-  string_ (src, utf8_byte_offset(src, n))
-{}
-
-ustring::ustring(const char* src)
-:
-  string_ (src)
-{}
-
-ustring::ustring(ustring::size_type n, gunichar uc)
-:
-  string_ ()
-{
-  if(uc < 0x80)
-  {
-    // Optimize the probably most common case.
-    string_.assign(n, static_cast<char>(uc));
-  }
-  else
-  {
-    const UnicharToUtf8 conv (uc);
-    string_.reserve(n * conv.len);
-
-    for(; n > 0; --n)
-      string_.append(conv.buf, conv.len);
-  }
-}
-
-ustring::ustring(ustring::size_type n, char c)
-:
-  string_ (n, c)
-{}
-
-ustring::ustring(const std::string& src)
-:
-  string_ (src)
-{}
-
-ustring::~ustring()
-{}
-
-void ustring::swap(ustring& other)
-{
-  string_.swap(other.string_);
-}
-
-
-/**** Glib::ustring::operator=() *******************************************/
-
-ustring& ustring::operator=(const ustring& other)
-{
-  string_ = other.string_;
-  return *this;
-}
-
-ustring& ustring::operator=(const std::string& src)
-{
-  string_ = src;
-  return *this;
-}
-
-ustring& ustring::operator=(const char* src)
-{
-  string_ = src;
-  return *this;
-}
-
-ustring& ustring::operator=(gunichar uc)
-{
-  const UnicharToUtf8 conv (uc);
-  string_.assign(conv.buf, conv.len);
-  return *this;
-}
-
-ustring& ustring::operator=(char c)
-{
-  string_ = c;
-  return *this;
-}
-
-
-/**** Glib::ustring::assign() **********************************************/
-
-ustring& ustring::assign(const ustring& src)
-{
-  string_ = src.string_;
-  return *this;
-}
-
-ustring& ustring::assign(const ustring& src, ustring::size_type i, ustring::size_type n)
-{
-  const Utf8SubstrBounds bounds (src.string_, i, n);
-  string_.assign(src.string_, bounds.i, bounds.n);
-  return *this;
-}
-
-ustring& ustring::assign(const char* src, ustring::size_type n)
-{
-  string_.assign(src, utf8_byte_offset(src, n));
-  return *this;
-}
-
-ustring& ustring::assign(const char* src)
-{
-  string_ = src;
-  return *this;
-}
-
-ustring& ustring::assign(ustring::size_type n, gunichar uc)
-{
-  ustring temp (n, uc);
-  string_.swap(temp.string_);
-  return *this;
-}
-
-ustring& ustring::assign(ustring::size_type n, char c)
-{
-  string_.assign(n, c);
-  return *this;
-}
-
-
-/**** Glib::ustring::operator+=() ******************************************/
-
-ustring& ustring::operator+=(const ustring& src)
-{
-  string_ += src.string_;
-  return *this;
-}
-
-ustring& ustring::operator+=(const char* src)
-{
-  string_ += src;
-  return *this;
-}
-
-ustring& ustring::operator+=(gunichar uc)
-{
-  const UnicharToUtf8 conv (uc);
-  string_.append(conv.buf, conv.len);
-  return *this;
-}
-
-ustring& ustring::operator+=(char c)
-{
-  string_ += c;
-  return *this;
-}
-
-
-/**** Glib::ustring::push_back() *******************************************/
-
-void ustring::push_back(gunichar uc)
-{
-  const UnicharToUtf8 conv (uc);
-  string_.append(conv.buf, conv.len);
-}
-
-void ustring::push_back(char c)
-{
-  string_ += c;
-}
-
-
-/**** Glib::ustring::append() **********************************************/
-
-ustring& ustring::append(const ustring& src)
-{
-  string_ += src.string_;
-  return *this;
-}
-
-ustring& ustring::append(const ustring& src, ustring::size_type i, ustring::size_type n)
-{
-  const Utf8SubstrBounds bounds (src.string_, i, n);
-  string_.append(src.string_, bounds.i, bounds.n);
-  return *this;
-}
-
-ustring& ustring::append(const char* src, ustring::size_type n)
-{
-  string_.append(src, utf8_byte_offset(src, n));
-  return *this;
-}
-
-ustring& ustring::append(const char* src)
-{
-  string_ += src;
-  return *this;
-}
-
-ustring& ustring::append(ustring::size_type n, gunichar uc)
-{
-  string_.append(ustring(n, uc).string_);
-  return *this;
-}
-
-ustring& ustring::append(ustring::size_type n, char c)
-{
-  string_.append(n, c);
-  return *this;
-}
-
-
-/**** Glib::ustring::insert() **********************************************/
-
-ustring& ustring::insert(ustring::size_type i, const ustring& src)
-{
-  string_.insert(utf8_byte_offset(string_, i), src.string_);
-  return *this;
-}
-
-ustring& ustring::insert(ustring::size_type i, const ustring& src,
-                         ustring::size_type i2, ustring::size_type n)
-{
-  const Utf8SubstrBounds bounds2 (src.string_, i2, n);
-  string_.insert(utf8_byte_offset(string_, i), src.string_, bounds2.i, bounds2.n);
-  return *this;
-}
-
-ustring& ustring::insert(ustring::size_type i, const char* src, ustring::size_type n)
-{
-  string_.insert(utf8_byte_offset(string_, i), src, utf8_byte_offset(src, n));
-  return *this;
-}
-
-ustring& ustring::insert(ustring::size_type i, const char* src)
-{
-  string_.insert(utf8_byte_offset(string_, i), src);
-  return *this;
-}
-
-ustring& ustring::insert(ustring::size_type i, ustring::size_type n, gunichar uc)
-{
-  string_.insert(utf8_byte_offset(string_, i), ustring(n, uc).string_);
-  return *this;
-}
-
-ustring& ustring::insert(ustring::size_type i, ustring::size_type n, char c)
-{
-  string_.insert(utf8_byte_offset(string_, i), n, c);
-  return *this;
-}
-
-ustring::iterator ustring::insert(ustring::iterator p, gunichar uc)
-{
-  const size_type offset = p.base() - string_.begin();
-  const UnicharToUtf8 conv (uc);
-  string_.insert(offset, conv.buf, conv.len);
-  return iterator(string_.begin() + offset);
-}
-
-ustring::iterator ustring::insert(ustring::iterator p, char c)
-{
-  return iterator(string_.insert(p.base(), c));
-}
-
-void ustring::insert(ustring::iterator p, ustring::size_type n, gunichar uc)
-{
-  string_.insert(p.base() - string_.begin(), ustring(n, uc).string_);
-}
-
-void ustring::insert(ustring::iterator p, ustring::size_type n, char c)
-{
-  string_.insert(p.base(), n, c);
-}
-
-
-/**** Glib::ustring::replace() *********************************************/
-
-ustring& ustring::replace(ustring::size_type i, ustring::size_type n, const ustring& src)
-{
-  const Utf8SubstrBounds bounds (string_, i, n);
-  string_.replace(bounds.i, bounds.n, src.string_);
-  return *this;
-}
-
-ustring& ustring::replace(ustring::size_type i, ustring::size_type n,
-                          const ustring& src, ustring::size_type i2, ustring::size_type n2)
-{
-  const Utf8SubstrBounds bounds (string_, i, n);
-  const Utf8SubstrBounds bounds2 (src.string_, i2, n2);
-  string_.replace(bounds.i, bounds.n, src.string_, bounds2.i, bounds2.n);
-  return *this;
-}
-
-ustring& ustring::replace(ustring::size_type i, ustring::size_type n,
-                          const char* src, ustring::size_type n2)
-{
-  const Utf8SubstrBounds bounds (string_, i, n);
-  string_.replace(bounds.i, bounds.n, src, utf8_byte_offset(src, n2));
-  return *this;
-}
-
-ustring& ustring::replace(ustring::size_type i, ustring::size_type n, const char* src)
-{
-  const Utf8SubstrBounds bounds (string_, i, n);
-  string_.replace(bounds.i, bounds.n, src);
-  return *this;
-}
-
-ustring& ustring::replace(ustring::size_type i, ustring::size_type n,
-                          ustring::size_type n2, gunichar uc)
-{
-  const Utf8SubstrBounds bounds (string_, i, n);
-  string_.replace(bounds.i, bounds.n, ustring(n2, uc).string_);
-  return *this;
-}
-
-ustring& ustring::replace(ustring::size_type i, ustring::size_type n,
-                          ustring::size_type n2, char c)
-{
-  const Utf8SubstrBounds bounds (string_, i, n);
-  string_.replace(bounds.i, bounds.n, n2, c);
-  return *this;
-}
-
-ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, const ustring& src)
-{
-  string_.replace(pbegin.base(), pend.base(), src.string_);
-  return *this;
-}
-
-ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend,
-                          const char* src, ustring::size_type n)
-{
-  string_.replace(pbegin.base(), pend.base(), src, utf8_byte_offset(src, n));
-  return *this;
-}
-
-ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, const char* src)
-{
-  string_.replace(pbegin.base(), pend.base(), src);
-  return *this;
-}
-
-ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend,
-                          ustring::size_type n, gunichar uc)
-{
-  string_.replace(pbegin.base(), pend.base(), ustring(n, uc).string_);
-  return *this;
-}
-
-ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend,
-                          ustring::size_type n, char c)
-{
-  string_.replace(pbegin.base(), pend.base(), n, c);
-  return *this;
-}
-
-
-/**** Glib::ustring::erase() ***********************************************/
-
-void ustring::clear()
-{
-  string_.erase();
-}
-
-ustring& ustring::erase(ustring::size_type i, ustring::size_type n)
-{
-  const Utf8SubstrBounds bounds (string_, i, n);
-  string_.erase(bounds.i, bounds.n);
-  return *this;
-}
-
-ustring& ustring::erase()
-{
-  string_.erase();
-  return *this;
-}
-
-ustring::iterator ustring::erase(ustring::iterator p)
-{
-  ustring::iterator iter_end = p;
-  ++iter_end;
-
-  return iterator(string_.erase(p.base(), iter_end.base()));
-}
-
-ustring::iterator ustring::erase(ustring::iterator pbegin, ustring::iterator pend)
-{
-  return iterator(string_.erase(pbegin.base(), pend.base()));
-}
-
-
-/**** Glib::ustring::compare() *********************************************/
-
-int ustring::compare(const ustring& rhs) const
-{
-  return g_utf8_collate(string_.c_str(), rhs.string_.c_str());
-}
-
-int ustring::compare(const char* rhs) const
-{
-  return g_utf8_collate(string_.c_str(), rhs);
-}
-
-int ustring::compare(ustring::size_type i, ustring::size_type n, const ustring& rhs) const
-{
-  return ustring(*this, i, n).compare(rhs);
-}
-
-int ustring::compare(ustring::size_type i, ustring::size_type n,
-                     const ustring& rhs, ustring::size_type i2, ustring::size_type n2) const
-{
-  return ustring(*this, i, n).compare(ustring(rhs, i2, n2));
-}
-
-int ustring::compare(ustring::size_type i, ustring::size_type n,
-                     const char* rhs, ustring::size_type n2) const
-{
-  return ustring(*this, i, n).compare(ustring(rhs, n2));
-}
-
-int ustring::compare(ustring::size_type i, ustring::size_type n, const char* rhs) const
-{
-  return ustring(*this, i, n).compare(rhs);
-}
-
-
-/**** Glib::ustring -- index access ****************************************/
-
-ustring::value_type ustring::operator[](ustring::size_type i) const
-{
-  return g_utf8_get_char(g_utf8_offset_to_pointer(string_.data(), i));
-}
-
-ustring::value_type ustring::at(ustring::size_type i) const
-{
-  const size_type byte_offset = utf8_byte_offset(string_, i);
-
-  // Throws std::out_of_range if the index is invalid.
-  return g_utf8_get_char(&string_.at(byte_offset));
-}
-
-
-/**** Glib::ustring -- iterator access *************************************/
-
-ustring::iterator ustring::begin()
-{
-  return iterator(string_.begin());
-}
-
-ustring::iterator ustring::end()
-{
-  return iterator(string_.end());
-}
-
-ustring::const_iterator ustring::begin() const
-{
-  return const_iterator(string_.begin());
-}
-
-ustring::const_iterator ustring::end() const
-{
-  return const_iterator(string_.end());
-}
-
-ustring::reverse_iterator ustring::rbegin()
-{
-  return reverse_iterator(iterator(string_.end()));
-}
-
-ustring::reverse_iterator ustring::rend()
-{
-  return reverse_iterator(iterator(string_.begin()));
-}
-
-ustring::const_reverse_iterator ustring::rbegin() const
-{
-  return const_reverse_iterator(const_iterator(string_.end()));
-}
-
-ustring::const_reverse_iterator ustring::rend() const
-{
-  return const_reverse_iterator(const_iterator(string_.begin()));
-}
-
-
-/**** Glib::ustring::find() ************************************************/
-
-ustring::size_type ustring::find(const ustring& str, ustring::size_type i) const
-{
-  return utf8_char_offset(string_, string_.find(str.string_, utf8_byte_offset(string_, i)));
-}
-
-ustring::size_type ustring::find(const char* str, ustring::size_type i, ustring::size_type n) const
-{
-  return utf8_char_offset(string_, string_.find(str, utf8_byte_offset(string_, i),
-                                                     utf8_byte_offset(str, n)));
-}
-
-ustring::size_type ustring::find(const char* str, ustring::size_type i) const
-{
-  return utf8_char_offset(string_, string_.find(str, utf8_byte_offset(string_, i)));
-}
-
-ustring::size_type ustring::find(gunichar uc, ustring::size_type i) const
-{
-  const UnicharToUtf8 conv (uc);
-  return utf8_char_offset(string_, string_.find(conv.buf, utf8_byte_offset(string_, i), conv.len));
-}
-
-ustring::size_type ustring::find(char c, ustring::size_type i) const
-{
-  return utf8_char_offset(string_, string_.find(c, utf8_byte_offset(string_, i)));
-}
-
-
-/**** Glib::ustring::rfind() ***********************************************/
-
-ustring::size_type ustring::rfind(const ustring& str, ustring::size_type i) const
-{
-  return utf8_char_offset(string_, string_.rfind(str.string_, utf8_byte_offset(string_, i)));
-}
-
-ustring::size_type ustring::rfind(const char* str, ustring::size_type i,
-                                  ustring::size_type n) const
-{
-  return utf8_char_offset(string_, string_.rfind(str, utf8_byte_offset(string_, i),
-                                                      utf8_byte_offset(str, n)));
-}
-
-ustring::size_type ustring::rfind(const char* str, ustring::size_type i) const
-{
-  return utf8_char_offset(string_, string_.rfind(str, utf8_byte_offset(string_, i)));
-}
-
-ustring::size_type ustring::rfind(gunichar uc, ustring::size_type i) const
-{
-  const UnicharToUtf8 conv (uc);
-  return utf8_char_offset(string_, string_.rfind(conv.buf, utf8_byte_offset(string_, i), conv.len));
-}
-
-ustring::size_type ustring::rfind(char c, ustring::size_type i) const
-{
-  return utf8_char_offset(string_, string_.rfind(c, utf8_byte_offset(string_, i)));
-}
-
-
-/**** Glib::ustring::find_first_of() ***************************************/
-
-ustring::size_type ustring::find_first_of(const ustring& match, ustring::size_type i) const
-{
-  return utf8_find_first_of(string_, i, match.string_.data(), match.string_.size(), false);
-}
-
-ustring::size_type ustring::find_first_of(const char* match,
-                                          ustring::size_type i, ustring::size_type n) const
-{
-  return utf8_find_first_of(string_, i, match, n, false);
-}
-
-ustring::size_type ustring::find_first_of(const char* match, ustring::size_type i) const
-{
-  return utf8_find_first_of(string_, i, match, -1, false);
-}
-
-ustring::size_type ustring::find_first_of(gunichar uc, ustring::size_type i) const
-{
-  return find(uc, i);
-}
-
-ustring::size_type ustring::find_first_of(char c, ustring::size_type i) const
-{
-  return find(c, i);
-}
-
-
-/**** Glib::ustring::find_last_of() ****************************************/
-
-ustring::size_type ustring::find_last_of(const ustring& match, ustring::size_type i) const
-{
-  return utf8_find_last_of(string_, i, match.string_.data(), match.string_.size(), false);
-}
-
-ustring::size_type ustring::find_last_of(const char* match,
-                                         ustring::size_type i, ustring::size_type n) const
-{
-  return utf8_find_last_of(string_, i, match, n, false);
-}
-
-ustring::size_type ustring::find_last_of(const char* match, ustring::size_type i) const
-{
-  return utf8_find_last_of(string_, i, match, -1, false);
-}
-
-ustring::size_type ustring::find_last_of(gunichar uc, ustring::size_type i) const
-{
-  return rfind(uc, i);
-}
-
-ustring::size_type ustring::find_last_of(char c, ustring::size_type i) const
-{
-  return rfind(c, i);
-}
-
-
-/**** Glib::ustring::find_first_not_of() ***********************************/
-
-ustring::size_type ustring::find_first_not_of(const ustring& match, ustring::size_type i) const
-{
-  return utf8_find_first_of(string_, i, match.string_.data(), match.string_.size(), true);
-}
-
-ustring::size_type ustring::find_first_not_of(const char* match,
-                                              ustring::size_type i, ustring::size_type n) const
-{
-  return utf8_find_first_of(string_, i, match, n, true);
-}
-
-ustring::size_type ustring::find_first_not_of(const char* match, ustring::size_type i) const
-{
-  return utf8_find_first_of(string_, i, match, -1, true);
-}
-
-// Unfortunately, all of the find_*_not_of() methods for single
-// characters need their own special implementation.
-//
-ustring::size_type ustring::find_first_not_of(gunichar uc, ustring::size_type i) const
-{
-  const size_type bi = utf8_byte_offset(string_, i);
-  if(bi != npos)
-  {
-    const char *const pbegin = string_.data();
-    const char *const pend   = pbegin + string_.size();
-
-    for(const char* p = pbegin + bi;
-        p < pend;
-        p = g_utf8_next_char(p), ++i)
-    {
-      if(g_utf8_get_char(p) != uc)
-        return i;
-    }
-  }
-  return npos;
-}
-
-ustring::size_type ustring::find_first_not_of(char c, ustring::size_type i) const
-{
-  const size_type bi = utf8_byte_offset(string_, i);
-  if(bi != npos)
-  {
-    const char *const pbegin = string_.data();
-    const char *const pend   = pbegin + string_.size();
-
-    for(const char* p = pbegin + bi;
-        p < pend;
-        p = g_utf8_next_char(p), ++i)
-    {
-      if(*p != c)
-        return i;
-    }
-  }
-  return npos;
-}
-
-
-/**** Glib::ustring::find_last_not_of() ************************************/
-
-ustring::size_type ustring::find_last_not_of(const ustring& match, ustring::size_type i) const
-{
-  return utf8_find_last_of(string_, i, match.string_.data(), match.string_.size(), true);
-}
-
-ustring::size_type ustring::find_last_not_of(const char* match,
-                                             ustring::size_type i, ustring::size_type n) const
-{
-  return utf8_find_last_of(string_, i, match, n, true);
-}
-
-ustring::size_type ustring::find_last_not_of(const char* match, ustring::size_type i) const
-{
-  return utf8_find_last_of(string_, i, match, -1, true);
-}
-
-// Unfortunately, all of the find_*_not_of() methods for single
-// characters need their own special implementation.
-//
-ustring::size_type ustring::find_last_not_of(gunichar uc, ustring::size_type i) const
-{
-  const char *const pbegin = string_.data();
-  const char *const pend   = pbegin + string_.size();
-  size_type i_cur   = 0;
-  size_type i_found = npos;
-
-  for(const char* p = pbegin;
-      p < pend && i_cur <= i;
-      p = g_utf8_next_char(p), ++i_cur)
-  {
-    if(g_utf8_get_char(p) != uc)
-      i_found = i_cur;
-  }
-  return i_found;
-}
-
-ustring::size_type ustring::find_last_not_of(char c, ustring::size_type i) const
-{
-  const char *const pbegin = string_.data();
-  const char *const pend   = pbegin + string_.size();
-  size_type i_cur   = 0;
-  size_type i_found = npos;
-
-  for(const char* p = pbegin;
-      p < pend && i_cur <= i;
-      p = g_utf8_next_char(p), ++i_cur)
-  {
-    if(*p != c)
-      i_found = i_cur;
-  }
-  return i_found;
-}
-
-
-/**** Glib::ustring -- get size and resize *********************************/
-
-bool ustring::empty() const
-{
-  return string_.empty();
-}
-
-ustring::size_type ustring::size() const
-{
-  const char *const pdata = string_.data();
-  return g_utf8_pointer_to_offset(pdata, pdata + string_.size());
-}
-
-ustring::size_type ustring::length() const
-{
-  const char *const pdata = string_.data();
-  return g_utf8_pointer_to_offset(pdata, pdata + string_.size());
-}
-
-ustring::size_type ustring::bytes() const
-{
-  return string_.size();
-}
-
-ustring::size_type ustring::capacity() const
-{
-  return string_.capacity();
-}
-
-ustring::size_type ustring::max_size() const
-{
-  return string_.max_size();
-}
-
-void ustring::resize(ustring::size_type n, gunichar uc)
-{
-  const size_type size_now = size();
-  if(n < size_now)
-    erase(n, npos);
-  else if(n > size_now)
-    append(n - size_now, uc);
-}
-
-void ustring::resize(ustring::size_type n, char c)
-{
-  const size_type size_now = size();
-  if(n < size_now)
-    erase(n, npos);
-  else if(n > size_now)
-    string_.append(n - size_now, c);
-}
-
-void ustring::reserve(ustring::size_type n)
-{
-  string_.reserve(n);
-}
-
-
-/**** Glib::ustring -- C string access *************************************/
-
-const char* ustring::data() const
-{
-  return string_.data();
-}
-
-const char* ustring::c_str() const
-{
-  return string_.c_str();
-}
-
-// Note that copy() requests UTF-8 character offsets as
-// parameters, but returns the number of copied bytes.
-//
-ustring::size_type ustring::copy(char* dest, ustring::size_type n, ustring::size_type i) const
-{
-  const Utf8SubstrBounds bounds (string_, i, n);
-  return string_.copy(dest, bounds.n, bounds.i);
-}
-
-
-/**** Glib::ustring -- UTF-8 utilities *************************************/
-
-bool ustring::validate() const
-{
-  return (g_utf8_validate(string_.data(), string_.size(), 0) != 0);
-}
-
-bool ustring::validate(ustring::iterator& first_invalid)
-{
-  const char *const pdata = string_.data();
-  const char* valid_end = pdata;
-  const int is_valid = g_utf8_validate(pdata, string_.size(), &valid_end);
-
-  first_invalid = iterator(string_.begin() + (valid_end - pdata));
-  return (is_valid != 0);
-}
-
-bool ustring::validate(ustring::const_iterator& first_invalid) const
-{
-  const char *const pdata = string_.data();
-  const char* valid_end = pdata;
-  const int is_valid = g_utf8_validate(pdata, string_.size(), &valid_end);
-
-  first_invalid = const_iterator(string_.begin() + (valid_end - pdata));
-  return (is_valid != 0);
-}
-
-bool ustring::is_ascii() const
-{
-  const char* p = string_.data();
-  const char *const pend = p + string_.size();
-
-  for(; p != pend; ++p)
-  {
-    if((static_cast<unsigned char>(*p) & 0x80u) != 0)
-      return false;
-  }
-
-  return true;
-}
-
-ustring ustring::normalize(NormalizeMode mode) const
-{
-  const ScopedPtr<char> buf (g_utf8_normalize(string_.data(), string_.size(),
-                                              static_cast<GNormalizeMode>(int(mode))));
-  return ustring(buf.get());
-}
-
-ustring ustring::uppercase() const
-{
-  const ScopedPtr<char> buf (g_utf8_strup(string_.data(), string_.size()));
-  return ustring(buf.get());
-}
-
-ustring ustring::lowercase() const
-{
-  const ScopedPtr<char> buf (g_utf8_strdown(string_.data(), string_.size()));
-  return ustring(buf.get());
-}
-
-ustring ustring::casefold() const
-{
-  const ScopedPtr<char> buf (g_utf8_casefold(string_.data(), string_.size()));
-  return ustring(buf.get());
-}
-
-std::string ustring::collate_key() const
-{
-  const ScopedPtr<char> buf (g_utf8_collate_key(string_.data(), string_.size()));
-  return std::string(buf.get());
-}
-
-std::string ustring::casefold_collate_key() const
-{
-  char *const casefold_buf = g_utf8_casefold(string_.data(), string_.size());
-  char *const key_buf      = g_utf8_collate_key(casefold_buf, -1);
-  g_free(casefold_buf);
-  return std::string(ScopedPtr<char>(key_buf).get());
-}
-
-/**** Glib::ustring -- Message formatting **********************************/
-
-// static
-ustring ustring::compose_argv(const Glib::ustring& fmt, int argc, const ustring* const* argv)
-{
-  std::string::size_type result_size = fmt.raw().size();
-
-  // Guesstimate the final string size.
-  for (int i = 0; i < argc; ++i)
-    result_size += argv[i]->raw().size();
-
-  std::string result;
-  result.reserve(result_size);
-
-  const char* const pfmt = fmt.raw().c_str();
-  const char* start = pfmt;
-
-  while (const char* const stop = std::strchr(start, '%'))
-  {
-    if (stop[1] == '%')
-    {
-      result.append(start, stop - start + 1);
-      start = stop + 2;
-    }
-    else
-    {
-      const int index = Ascii::digit_value(stop[1]) - 1;
-
-      if (index >= 0 && index < argc)
-      {
-        result.append(start, stop - start);
-        result += argv[index]->raw();
-        start = stop + 2;
-      }
-      else
-      {
-        const char* const next = (stop[1] != '\0') ? g_utf8_next_char(stop + 1) : (stop + 1);
-
-        // Copy invalid substitutions literally to the output.
-        result.append(start, next - start);
-
-        g_warning("invalid substitution \"%s\" in fmt string \"%s\"",
-                  result.c_str() + result.size() - (next - stop), pfmt);
-        start = next;
-      }
-    }
-  }
-
-  result.append(start, pfmt + fmt.raw().size() - start);
-
-  return result;
-}
-
-/**** Glib::ustring::SequenceToString **************************************/
-
-ustring::SequenceToString<Glib::ustring::iterator,gunichar>
-  ::SequenceToString(Glib::ustring::iterator pbegin, Glib::ustring::iterator pend)
-:
-  std::string(pbegin.base(), pend.base())
-{}
-
-ustring::SequenceToString<Glib::ustring::const_iterator,gunichar>
-  ::SequenceToString(Glib::ustring::const_iterator pbegin, Glib::ustring::const_iterator pend)
-:
-  std::string(pbegin.base(), pend.base())
-{}
-
-/**** Glib::ustring::FormatStream ******************************************/
-
-ustring::FormatStream::FormatStream()
-:
-  stream_ ()
-{}
-
-ustring::FormatStream::~FormatStream()
-{}
-
-ustring ustring::FormatStream::to_string() const
-{
-  GError* error = 0;
-
-#ifdef GLIBMM_HAVE_WIDE_STREAM
-  const std::wstring str = stream_.str();
-
-# if defined(__STDC_ISO_10646__) && SIZEOF_WCHAR_T == 4
-  // Avoid going through iconv if wchar_t always contains UCS-4.
-  glong n_bytes = 0;
-  const ScopedPtr<char> buf (g_ucs4_to_utf8(reinterpret_cast<const gunichar*>(str.data()),
-                                            str.size(), 0, &n_bytes, &error));
-# elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2
-  // Avoid going through iconv if wchar_t always contains UTF-16.
-  glong n_bytes = 0;
-  const ScopedPtr<char> buf (g_utf16_to_utf8(reinterpret_cast<const gunichar2*>(str.data()),
-                                             str.size(), 0, &n_bytes, &error));
-# else
-  gsize n_bytes = 0;
-  const ScopedPtr<char> buf (g_convert(reinterpret_cast<const char*>(str.data()),
-                                       str.size() * sizeof(std::wstring::value_type),
-                                       "UTF-8", "WCHAR_T", 0, &n_bytes, &error));
-# endif /* !(__STDC_ISO_10646__ || G_OS_WIN32) */
-
-#else /* !GLIBMM_HAVE_WIDE_STREAM */
-  const std::string str = stream_.str();
-
-  gsize n_bytes = 0;
-  const ScopedPtr<char> buf (g_locale_to_utf8(str.data(), str.size(), 0, &n_bytes, &error));
-#endif /* !GLIBMM_HAVE_WIDE_STREAM */
-
-  if (error)
-  {
-    Glib::Error::throw_exception(error);
-  }
-
-  return ustring(buf.get(), buf.get() + n_bytes);
-}
-
-/**** Glib::ustring -- stream I/O operators ********************************/
-
-std::istream& operator>>(std::istream& is, Glib::ustring& utf8_string)
-{
-  std::string str;
-  is >> str;
-
-  GError* error = 0;
-  gsize n_bytes = 0;
-  const ScopedPtr<char> buf (g_locale_to_utf8(str.data(), str.size(), 0, &n_bytes, &error));
-
-  if (error)
-  {
-    Glib::Error::throw_exception(error);
-  }
-
-  utf8_string.assign(buf.get(), buf.get() + n_bytes);
-
-  return is;
-}
-
-std::ostream& operator<<(std::ostream& os, const Glib::ustring& utf8_string)
-{
-  GError* error = 0;
-  const ScopedPtr<char> buf (g_locale_from_utf8(utf8_string.raw().data(),
-                                                utf8_string.raw().size(), 0, 0, &error));
-  if (error)
-  {
-    Glib::Error::throw_exception(error);
-  }
-
-  // This won't work if the string contains NUL characters.  Unfortunately,
-  // std::ostream::write() ignores format flags, so we cannot use that.
-  // The only option would be to create a temporary std::string.  However,
-  // even then GCC's libstdc++-v3 prints only the characters up to the first
-  // NUL.  Given this, there doesn't seem much of a point in allowing NUL in
-  // formatted output.  The semantics would be unclear anyway: what's the
-  // screen width of a NUL?
-  os << buf.get();
-
-  return os;
-}
-
-#ifdef GLIBMM_HAVE_WIDE_STREAM
-
-std::wistream& operator>>(std::wistream& is, ustring& utf8_string)
-{
-  GError* error = 0;
-
-  std::wstring wstr;
-  is >> wstr;
-
-#if defined(__STDC_ISO_10646__) && SIZEOF_WCHAR_T == 4
-  // Avoid going through iconv if wchar_t always contains UCS-4.
-  glong n_bytes = 0;
-  const ScopedPtr<char> buf (g_ucs4_to_utf8(reinterpret_cast<const gunichar*>(wstr.data()),
-                                            wstr.size(), 0, &n_bytes, &error));
-#elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2
-  // Avoid going through iconv if wchar_t always contains UTF-16.
-  glong n_bytes = 0;
-  const ScopedPtr<char> buf (g_utf16_to_utf8(reinterpret_cast<const gunichar2*>(wstr.data()),
-                                             wstr.size(), 0, &n_bytes, &error));
-#else
-  gsize n_bytes = 0;
-  const ScopedPtr<char> buf (g_convert(reinterpret_cast<const char*>(wstr.data()),
-                                       wstr.size() * sizeof(std::wstring::value_type),
-                                       "UTF-8", "WCHAR_T", 0, &n_bytes, &error));
-#endif // !(__STDC_ISO_10646__ || G_OS_WIN32)
-
-  if (error)
-  {
-    Glib::Error::throw_exception(error);
-  }
-
-  utf8_string.assign(buf.get(), buf.get() + n_bytes);
-
-  return is;
-}
-
-std::wostream& operator<<(std::wostream& os, const ustring& utf8_string)
-{
-  GError* error = 0;
-
-#if defined(__STDC_ISO_10646__) && SIZEOF_WCHAR_T == 4
-  // Avoid going through iconv if wchar_t always contains UCS-4.
-  const ScopedPtr<gunichar> buf (g_utf8_to_ucs4(utf8_string.raw().data(),
-                                                utf8_string.raw().size(), 0, 0, &error));
-#elif defined(G_OS_WIN32) && SIZEOF_WCHAR_T == 2
-  // Avoid going through iconv if wchar_t always contains UTF-16.
-  const ScopedPtr<gunichar2> buf (g_utf8_to_utf16(utf8_string.raw().data(),
-                                                  utf8_string.raw().size(), 0, 0, &error));
-#else
-  // TODO: For some reason the conversion from UTF-8 to WCHAR_T doesn't work
-  // with g_convert(), while iconv on the command line handles it just fine.
-  // Maybe a bug in GLib?
-  const ScopedPtr<char> buf (g_convert(utf8_string.raw().data(), utf8_string.raw().size(),
-                                       "WCHAR_T", "UTF-8", 0, 0, &error));
-#endif // !(__STDC_ISO_10646__ || G_OS_WIN32)
-
-  if (error)
-  {
-    Glib::Error::throw_exception(error);
-  }
-
-  // This won't work if the string contains NUL characters.  Unfortunately,
-  // std::wostream::write() ignores format flags, so we cannot use that.
-  // The only option would be to create a temporary std::wstring.  However,
-  // even then GCC's libstdc++-v3 prints only the characters up to the first
-  // NUL.  Given this, there doesn't seem much of a point in allowing NUL in
-  // formatted output.  The semantics would be unclear anyway: what's the
-  // screen width of a NUL?
-  os << reinterpret_cast<wchar_t*>(buf.get());
-
-  return os;
-}
-
-#endif /* GLIBMM_HAVE_WIDE_STREAM */
-
-} // namespace Glib

Deleted: trunk/src/libustring/ustring.h
===================================================================
--- trunk/src/libustring/ustring.h	2012-03-20 13:49:06 UTC (rev 203)
+++ trunk/src/libustring/ustring.h	2012-03-20 14:05:34 UTC (rev 204)
@@ -1,1615 +0,0 @@
-// -*- c++ -*-
-#ifndef _GLIBMM_USTRING_H
-#define _GLIBMM_USTRING_H
-
-/* $Id$ */
-
-/* Copyright (C) 2002 The gtkmm Development Team
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <glibmmconfig.h>
-#include <glibmm/unicode.h>
-#include <glib.h>
-
-#include <iosfwd>
-#include <iterator>
-#include <sstream>
-#include <string>
-#ifndef GLIBMM_HAVE_STD_ITERATOR_TRAITS
-#include <cstddef> /* for ptrdiff_t */
-#endif
-
-namespace Glib
-{
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-#ifndef GLIBMM_HAVE_STD_ITERATOR_TRAITS
-
-template <class T>
-struct IteratorTraits
-{
-  typedef typename T::iterator_category iterator_category;
-  typedef typename T::value_type        value_type;
-  typedef typename T::difference_type   difference_type;
-  typedef typename T::pointer           pointer;
-  typedef typename T::reference         reference;
-};
-
-template <class T>
-struct IteratorTraits<T*>
-{
-  typedef std::random_access_iterator_tag iterator_category;
-  typedef T                               value_type;
-  typedef ptrdiff_t                       difference_type;
-  typedef T*                              pointer;
-  typedef T&                              reference;
-};
-
-template <class T>
-struct IteratorTraits<const T*>
-{
-  typedef std::random_access_iterator_tag iterator_category;
-  typedef T                               value_type;
-  typedef ptrdiff_t                       difference_type;
-  typedef const T*                        pointer;
-  typedef const T&                        reference;
-};
-
-#endif /* GLIBMM_HAVE_STD_ITERATOR_TRAITS */
-#endif /* DOXYGEN_SHOULD_SKIP_THIS */
-
-
-/** The iterator type of Glib::ustring.
- * Note this is not a random access iterator but a bidirectional one,
- * since all index operations need to iterate over the UTF-8 data.  Use
- * std::advance() to move to a certain position.  However, all of the
- * relational operators are available:
- * <tt>==&nbsp;!=&nbsp;<&nbsp;>&nbsp;<=&nbsp;>=</tt>
- *
- * A writeable iterator isn't provided because:  The number of bytes of
- * the old UTF-8 character and the new one to write could be different.
- * Therefore, any write operation would invalidate all other iterators
- * pointing into the same string.
- */
-template <class T>
-class ustring_Iterator
-{
-public:
-  typedef std::bidirectional_iterator_tag   iterator_category;
-  typedef gunichar                          value_type;
-  typedef std::string::difference_type      difference_type;
-  typedef value_type                        reference;
-  typedef void                              pointer;
-
-  inline ustring_Iterator();
-  inline ustring_Iterator(const ustring_Iterator<std::string::iterator>& other);
-
-  inline value_type operator*() const;
-
-  inline ustring_Iterator<T> &     operator++();
-  inline const ustring_Iterator<T> operator++(int);
-  inline ustring_Iterator<T> &     operator--();
-  inline const ustring_Iterator<T> operator--(int);
-
-  explicit inline ustring_Iterator(T pos);
-  inline T base() const;
-
-private:
-  T pos_;
-};
-
-
-/** Extract a UCS-4 character from UTF-8 data.
- * Convert a single UTF-8 (multibyte) character starting at @p pos to
- * a UCS-4 wide character.  This may read up to 6 bytes after the start
- * position, depending on the UTF-8 character width.  You have to make
- * sure the source contains at least one valid UTF-8 character.
- *
- * This is mainly used by the implementation of Glib::ustring::iterator,
- * but it might be useful as utility function if you prefer using
- * std::string even for UTF-8 encoding.
- */
-gunichar get_unichar_from_std_iterator(std::string::const_iterator pos) G_GNUC_PURE;
-
-
-/** Glib::ustring has much the same interface as std::string, but contains
- * %Unicode characters encoded as UTF-8.
- *
- * @par About UTF-8 and ASCII
- * @par
- * The standard character set ANSI_X3.4-1968&nbsp;-- more commonly known as
- * ASCII&nbsp;-- is a subset of UTF-8.  So, if you want to, you can use
- * Glib::ustring without even thinking about UTF-8.
- * @par
- * Whenever ASCII is mentioned in this manual, we mean the @em real ASCII
- * (i.e. as defined in ANSI_X3.4-1968), which contains only 7-bit characters.
- * Glib::ustring can @em not be used with ASCII-compatible extended 8-bit
- * charsets like ISO-8859-1.  It's a good idea to avoid string literals
- * containing non-ASCII characters (e.g. German umlauts) in source code,
- * or at least you should use UTF-8 literals.
- * @par
- * You can find a detailed UTF-8 and %Unicode FAQ here:
- * http://www.cl.cam.ac.uk/~mgk25/unicode.html
- *
- * @par Glib::ustring vs. std::string
- * @par
- * Glib::ustring has implicit type conversions to and from std::string.
- * These conversions do @em not convert to/from the current locale (see
- * Glib::locale_from_utf8() and Glib::locale_to_utf8() if you need that).  You
- * can always use std::string instead of Glib::ustring&nbsp;-- however, using
- * std::string with multi-byte characters is quite hard.  For instance,
- * <tt>std::string::operator[]</tt> might return a byte in the middle of a
- * character, and <tt>std::string::length()</tt> returns the number of bytes
- * rather than characters.  So don't do that without a good reason.
- * @par
- * In a perfect world the C++ Standard Library would contain a UTF-8 string
- * class.  Unfortunately, the C++ standard doesn't mention UTF-8 at all.  Note
- * that std::wstring is not a UTF-8 string class because it contains only
- * fixed-width characters (where width could be 32, 16, or even 8 bits).
- *
- * @par Glib::ustring and stream input/output
- * @par
- * The stream I/O operators, that is operator<<() and operator>>(), perform
- * implicit charset conversion to/from the current locale.  If that's not
- * what you intented (e.g. when writing to a configuration file that should
- * always be UTF-8 encoded) use ustring::raw() to override this behaviour.
- * @par
- * If you're using std::ostringstream to build strings for display in the
- * user interface, you must convert the result back to UTF-8 as shown below:
- * @code
- * std::ostringstream output;
- * output.imbue(std::locale("")); // use the user's locale for this stream
- * output << percentage << " % done";
- * label->set_text(Glib::locale_to_utf8(output.str()));
- * @endcode
- *
- * @par Formatted output and internationalization
- * @par
- * The methods ustring::compose() and ustring::format() provide a convenient
- * and powerful alternative to string streams, as shown in the example below.
- * Refer to the method documentation of compose() and format() for details.
- * @code
- * using Glib::ustring;
- *
- * ustring message = ustring::compose("%1 is lower than 0x%2.",
- *                                    12, ustring::format(std::hex, 16));
- * @endcode
- *
- * @par Implementation notes
- * @par
- * Glib::ustring does not inherit from std::string, because std::string was
- * intended to be a final class.  For instance, it does not have a virtual
- * destructor.  Also, a HAS-A relationship is more appropriate because
- * ustring can't just enhance the std::string interface.  Rather, it has to
- * reimplement the interface so that all operations are based on characters
- * instead of bytes.
- */
-class ustring
-{
-public:
-  typedef std::string::size_type                        size_type;
-  typedef std::string::difference_type                  difference_type;
-
-  typedef gunichar                                      value_type;
-  typedef gunichar &                                    reference;
-  typedef const gunichar &                              const_reference;
-
-  typedef ustring_Iterator<std::string::iterator>       iterator;
-  typedef ustring_Iterator<std::string::const_iterator> const_iterator;
-
-#ifndef GLIBMM_HAVE_SUN_REVERSE_ITERATOR
-
-  typedef std::reverse_iterator<iterator>               reverse_iterator;
-  typedef std::reverse_iterator<const_iterator>         const_reverse_iterator;
-
-#else
-
-  typedef std::reverse_iterator<iterator,
-                                iterator::iterator_category,
-                                iterator::value_type,
-                                iterator::reference,
-                                iterator::pointer,
-                                iterator::difference_type> reverse_iterator;
-  typedef std::reverse_iterator<const_iterator,
-                                const_iterator::iterator_category,
-                                const_iterator::value_type,
-                                const_iterator::reference,
-                                const_iterator::pointer,
-                                const_iterator::difference_type> const_reverse_iterator;
-
-#endif /* GLIBMM_HAVE_SUN_REVERSE_ITERATOR */
-
-#ifdef GLIBMM_HAVE_ALLOWS_STATIC_INLINE_NPOS
-  static GLIBMM_API const size_type npos = std::string::npos;
-#else
-  //The IRIX MipsPro compiler says "The indicated constant value is not known",
-  //so we need to initalize the static member data elsewhere.
-  static GLIBMM_API const size_type npos;
-#endif
-
-  /*! Default constructor, which creates an empty string.
-   */
-  ustring();
-
-  ~ustring();
-
-  /*! Construct a ustring as a copy of another ustring.
-   * @param other A source string.
-   */
-  ustring(const ustring& other);
-
-  /*! Assign the value of another string to this string.
-   * @param other A source string.
-   */
-  ustring& operator=(const ustring& other);
-
-  /*! Swap contents with another string.
-   * @param other String to swap with.
-   */
-  void swap(ustring& other);
-
-  /*! Construct a ustring as a copy of another std::string.
-   * @param src A source <tt>std::string</tt> containing text encoded as UTF-8.
-   */
-  ustring(const std::string& src);
-
-  /*! Construct a ustring as a copy of a substring.
-   * @param src %Source ustring.
-   * @param i Index of first character to copy from.
-   * @param n Number of UTF-8 characters to copy (defaults to copying the remainder).
-   */
-  ustring(const ustring& src, size_type i, size_type n=npos);
-
-  /*! Construct a ustring as a partial copy of a C string.
-   * @param src %Source C string encoded as UTF-8.
-   * @param n Number of UTF-8 characters to copy.
-   */
-  ustring(const char* src, size_type n);
-
-  /*! Construct a ustring as a copy of a C string.
-   * @param src %Source C string encoded as UTF-8.
-   */
-  ustring(const char* src);
-
-  /*! Construct a ustring as multiple characters.
-   * @param n Number of characters.
-   * @param uc UCS-4 code point to use.
-   */
-  ustring(size_type n, gunichar uc);
-
-  /*! Construct a ustring as multiple characters.
-   * @param n Number of characters.
-   * @param c ASCII character to use.
-   */
-  ustring(size_type n, char c);
-
-  /*! Construct a ustring as a copy of a range.
-   * @param pbegin Start of range.
-   * @param pend End of range.
-   */
-  template <class In> ustring(In pbegin, In pend);
-
-
-//! @name Assign new contents.
-//! @{
-
-  ustring& operator=(const std::string& src);
-  ustring& operator=(const char* src);
-  ustring& operator=(gunichar uc);
-  ustring& operator=(char c);
-
-  ustring& assign(const ustring& src);
-  ustring& assign(const ustring& src, size_type i, size_type n);
-  ustring& assign(const char* src, size_type n);
-  ustring& assign(const char* src);
-  ustring& assign(size_type n, gunichar uc);
-  ustring& assign(size_type n, char c);
-  template <class In> ustring& assign(In pbegin, In pend);
-
-//! @}
-//! @name Append to the string.
-//! @{
-
-  ustring& operator+=(const ustring& src);
-  ustring& operator+=(const char* src);
-  ustring& operator+=(gunichar uc);
-  ustring& operator+=(char c);
-  void push_back(gunichar uc);
-  void push_back(char c);
-
-  ustring& append(const ustring& src);
-  ustring& append(const ustring& src, size_type i, size_type n);
-  ustring& append(const char* src, size_type n);
-  ustring& append(const char* src);
-  ustring& append(size_type n, gunichar uc);
-  ustring& append(size_type n, char c);
-  template <class In> ustring& append(In pbegin, In pend);
-
-//! @}
-//! @name Insert into the string.
-//! @{
-
-  ustring& insert(size_type i, const ustring& src);
-  ustring& insert(size_type i, const ustring& src, size_type i2, size_type n);
-  ustring& insert(size_type i, const char* src, size_type n);
-  ustring& insert(size_type i, const char* src);
-  ustring& insert(size_type i, size_type n, gunichar uc);
-  ustring& insert(size_type i, size_type n, char c);
-
-  iterator insert(iterator p, gunichar uc);
-  iterator insert(iterator p, char c);
-  void     insert(iterator p, size_type n, gunichar uc);
-  void     insert(iterator p, size_type n, char c);
-  template <class In> void insert(iterator p, In pbegin, In pend);
-
-//! @}
-//! @name Replace sub-strings.
-//! @{
-
-  ustring& replace(size_type i, size_type n, const ustring& src);
-  ustring& replace(size_type i, size_type n, const ustring& src, size_type i2, size_type n2);
-  ustring& replace(size_type i, size_type n, const char* src, size_type n2);
-  ustring& replace(size_type i, size_type n, const char* src);
-  ustring& replace(size_type i, size_type n, size_type n2, gunichar uc);
-  ustring& replace(size_type i, size_type n, size_type n2, char c);
-
-  ustring& replace(iterator pbegin, iterator pend, const ustring& src);
-  ustring& replace(iterator pbegin, iterator pend, const char* src, size_type n);
-  ustring& replace(iterator pbegin, iterator pend, const char* src);
-  ustring& replace(iterator pbegin, iterator pend, size_type n, gunichar uc);
-  ustring& replace(iterator pbegin, iterator pend, size_type n, char c);
-  template <class In> ustring& replace(iterator pbegin, iterator pend, In pbegin2, In pend2);
-
-//! @}
-//! @name Erase sub-strings.
-//! @{
-
-  void clear();
-  ustring& erase(size_type i, size_type n=npos);
-  ustring& erase();
-  iterator erase(iterator p);
-  iterator erase(iterator pbegin, iterator pend);
-
-//! @}
-//! @name Compare and collate.
-//! @{
-
-  int compare(const ustring& rhs) const;
-  int compare(const char* rhs)    const;
-  int compare(size_type i, size_type n, const ustring& rhs) const;
-  int compare(size_type i, size_type n, const ustring& rhs, size_type i2, size_type n2) const;
-  int compare(size_type i, size_type n, const char* rhs, size_type n2) const;
-  int compare(size_type i, size_type n, const char* rhs) const;
-
-  /*! Create a unique sorting key for the UTF-8 string.  If you need to
-   * compare UTF-8 strings regularly, e.g. for sorted containers such as
-   * <tt>std::set<></tt>, you should consider creating a collate key first
-   * and compare this key instead of the actual string.
-   *
-   * The ustring::compare() methods as well as the relational operators
-   * <tt>==&nbsp;!=&nbsp;<&nbsp;>&nbsp;<=&nbsp;>=</tt> are quite costly
-   * because they have to deal with %Unicode and the collation rules defined by
-   * the current locale.  Converting both operands to UCS-4 is just the first
-   * of several costly steps involved when comparing ustrings.  So be careful.
-   */
-  std::string collate_key() const;
-
-  /*! Create a unique key for the UTF-8 string that can be used for caseless
-   * sorting.  <tt>ustr.casefold_collate_key()</tt> results in the same string
-   * as <tt>ustr.casefold().collate_key()</tt>, but the former is likely more
-   * efficient.
-   */
-  std::string casefold_collate_key() const;
-
-//! @}
-//! @name Extract characters and sub-strings.
-//! @{
-
-  /*! No reference return; use replace() to write characters. */
-  value_type operator[](size_type i) const;
-
-  /*! No reference return; use replace() to write characters. @throw std::out_of_range */
-  value_type at(size_type i) const;
-
-  inline ustring substr(size_type i=0, size_type n=npos) const;
-
-//! @}
-//! @name Access a sequence of characters.
-//! @{
-
-  iterator begin();
-  iterator end();
-  const_iterator begin() const;
-  const_iterator end()   const;
-  reverse_iterator rbegin();
-  reverse_iterator rend();
-  const_reverse_iterator rbegin() const;
-  const_reverse_iterator rend()   const;
-
-//! @}
-//! @name Find sub-strings.
-//! @{
-
-  size_type find(const ustring& str, size_type i=0) const;
-  size_type find(const char* str, size_type i, size_type n) const;
-  size_type find(const char* str, size_type i=0) const;
-  size_type find(gunichar uc, size_type i=0) const;
-  size_type find(char c, size_type i=0) const;
-
-  size_type rfind(const ustring& str, size_type i=npos) const;
-  size_type rfind(const char* str, size_type i, size_type n) const;
-  size_type rfind(const char* str, size_type i=npos) const;
-  size_type rfind(gunichar uc, size_type i=npos) const;
-  size_type rfind(char c, size_type i=npos) const;
-
-//! @}
-//! @name Match against a set of characters.
-//! @{
-
-  size_type find_first_of(const ustring& match, size_type i=0) const;
-  size_type find_first_of(const char* match, size_type i, size_type n) const;
-  size_type find_first_of(const char* match, size_type i=0) const;
-  size_type find_first_of(gunichar uc, size_type i=0) const;
-  size_type find_first_of(char c, size_type i=0) const;
-
-  size_type find_last_of(const ustring& match, size_type i=npos) const;
-  size_type find_last_of(const char* match, size_type i, size_type n) const;
-  size_type find_last_of(const char* match, size_type i=npos) const;
-  size_type find_last_of(gunichar uc, size_type i=npos) const;
-  size_type find_last_of(char c, size_type i=npos) const;
-
-  size_type find_first_not_of(const ustring& match, size_type i=0) const;
-  size_type find_first_not_of(const char* match, size_type i, size_type n) const;
-  size_type find_first_not_of(const char* match, size_type i=0) const;
-  size_type find_first_not_of(gunichar uc, size_type i=0) const;
-  size_type find_first_not_of(char c, size_type i=0) const;
-
-  size_type find_last_not_of(const ustring& match, size_type i=npos) const;
-  size_type find_last_not_of(const char* match, size_type i, size_type n) const;
-  size_type find_last_not_of(const char* match, size_type i=npos) const;
-  size_type find_last_not_of(gunichar uc, size_type i=npos) const;
-  size_type find_last_not_of(char c, size_type i=npos) const;
-
-//! @}
-//! @name Retrieve the string's size.
-//! @{
-
-  /** Returns true if the string is empty. Equivalent to *this == "".
-   * @result Whether the string is empty.
-   */
-  bool empty()  const;
-
-  /** Returns the number of characters in the string, not including any null-termination.
-   * @result T...
 
[truncated message content]

[xmlwrapp-commits] SF.net SVN: xmlwrapp:[204] trunk/src/libustring

A modern style C++ library for working with XML data

[xmlwrapp-commits] SF.net SVN: xmlwrapp:[204] trunk/src/libustring