|
From: Foster B. <fos...@us...> - 2005-04-07 00:53:36
|
Update of /cvsroot/adobe-source/sandbox/adobe-source/adobe/source In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv25640/adobe/source Added Files: xstr.cpp Log Message: xstr initial commmit. --- NEW FILE: xstr.cpp --- /* Copyright 2005 Adobe Systems Incorporated Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt or a copy at http://opensource.adobe.com/licenses.html) */ /*************************************************************************************************/ #include <adobe/xstr.hpp> #include <adobe/config.hpp> #include <adobe/name.hpp> #include <adobe/dictionary.hpp> #include <adobe/once.hpp> #include <adobe/algorithm.hpp> #include <adobe/functional.hpp> #include <vector> #include <map> #include <boost/bind.hpp> #ifdef __MWERKS__ // Specific to Metrowerks, not to Macintosh platform. #pragma warn_unusedarg off #endif //#define BOOST_SPIRIT_DEBUG 1 #include <boost/spirit/core.hpp> #ifdef BOOST_SPIRIT_DEBUG #include <boost/spirit/debug.hpp> #endif #ifdef __MWERKS__ // Specific to Metrowerks, not to Macintosh platform. #pragma warn_unusedarg reset #endif #if !defined(NDEBUG) && defined(ADOBE_SERIALIZATION) #define DOING_SERIALIZATION 0 #endif #if DOING_SERIALIZATION #include <iostream> #include <sstream> #endif /*************************************************************************************************/ ADOBE_GLOBAL_MUTEX_DEFINITION(xstr_store) ADOBE_GLOBAL_MUTEX_DEFINITION(xstr_context) ADOBE_ONCE_DECLARATION(xstr_once) ADOBE_ONCE_STATIC_INSTANCE(xstr_once) /*************************************************************************************************/ namespace adobe { /*************************************************************************************************/ /* Just counts the number of outputs; doesn't copy anything. More efficient than a back_insert_iterator into a vector you'll never use if all you're interested in is the size of the resultant vector. */ class back_insert_counter : public std::iterator<std::output_iterator_tag, void, void, void, void> { public: back_insert_counter() : count_m(0) { } template <typename T> back_insert_counter& operator=(const T&) { return *this; } back_insert_counter& operator*() { return *this; } back_insert_counter& operator++() { ++count_m; return *this; } back_insert_counter& operator++(int) { ++count_m; return *this; } std::size_t count() const { return count_m; } private: std::size_t count_m; }; /*************************************************************************************************/ template <typename ForwardIterator1, typename ForwardIterator2> bool is_subset( const ForwardIterator1& first1, const ForwardIterator1& last1, const ForwardIterator2& first2, const ForwardIterator2& last2) { /* subset is a proper subset of superset if size(intersect(subset, superset)) == size(subset) */ std::size_t matches(adobe::set_intersection(first1, last1, first2, last2, back_insert_counter()).count()); std::size_t subset_size(std::distance(first2, last2)); return matches == subset_size; } /*************************************************************************************************/ template <typename ForwardIterator1, typename ForwardIterator2> bool is_subset( ForwardIterator1& first1, ForwardIterator1& last1, ForwardIterator2& first2, ForwardIterator2& last2) { return is_subset( const_cast<const ForwardIterator1>(first1), const_cast<const ForwardIterator1>(last1), const_cast<const ForwardIterator2>(first2), const_cast<const ForwardIterator2>(last2)); } /*************************************************************************************************/ template <typename ForwardIterator1, typename ForwardIterator2, typename BinaryPredicate> bool is_subset( const ForwardIterator1& first1, const ForwardIterator1& last1, const ForwardIterator2& first2, const ForwardIterator2& last2, BinaryPredicate comp) { /* subset is a proper subset of superset if size(intersect(subset, superset)) == size(subset) */ std::size_t matches(adobe::set_intersection(first1, last1, first2, last2, back_insert_counter(), comp).count()); std::size_t subset_size(std::distance(first2, last2)); return matches == subset_size; } /*************************************************************************************************/ template <typename ForwardIterator1, typename ForwardIterator2, typename BinaryPredicate> bool is_subset( ForwardIterator1& first1, ForwardIterator1& last1, ForwardIterator2& first2, ForwardIterator2& last2, BinaryPredicate comp) { return is_subset( const_cast<const ForwardIterator1>(first1), const_cast<const ForwardIterator1>(last1), const_cast<const ForwardIterator2>(first2), const_cast<const ForwardIterator2>(last2), comp); } /*************************************************************************************************/ template <typename ForwardRange1, typename ForwardRange2> inline bool is_subset(const ForwardRange1& superset, const ForwardRange2& subset) { return is_subset( boost::begin(superset), boost::end(superset), boost::begin(subset), boost::end(subset)); } /*************************************************************************************************/ template <typename ForwardRange1, typename ForwardRange2> inline bool is_subset(ForwardRange1& superset, ForwardRange2& subset) { return is_subset( const_cast<const ForwardRange1>(superset), const_cast<const ForwardRange2>(subset)); } /*************************************************************************************************/ template <typename ForwardRange1, typename ForwardRange2, typename BinaryPredicate> inline bool is_subset(const ForwardRange1& superset, const ForwardRange2& subset, BinaryPredicate comp) { return is_subset( boost::begin(superset), boost::end(superset), boost::begin(subset), boost::end(subset), comp); } /*************************************************************************************************/ template <typename ForwardRange1, typename ForwardRange2, typename BinaryPredicate> inline bool is_subset(ForwardRange1& superset, ForwardRange2& subset, BinaryPredicate comp) { return is_subset( const_cast<const ForwardRange1>(superset), const_cast<const ForwardRange2>(subset), comp); } /*************************************************************************************************/ } // namespace adobe /*************************************************************************************************/ namespace { /*************************************************************************************************/ struct str_less_t { bool operator () (const adobe::name_t& x, const adobe::name_t& y) const { return x.get() < y.get(); } }; /*************************************************************************************************/ struct node_t { typedef std::pair<adobe::name_t, adobe::name_t> attribute_t; typedef std::vector<attribute_t> attribute_set_t; void attribute_value(const adobe::name_t attribute, adobe::name_t& value) const; attribute_set_t attribute_set_m; adobe::name_t value_m; }; /*************************************************************************************************/ #if DOING_SERIALIZATION std::ostream& operator << (std::ostream& s, const node_t& node) { node_t::attribute_set_t::const_iterator first(node.attribute_set_m.begin()); node_t::attribute_set_t::const_iterator last(node.attribute_set_m.end()); s << "<xstr"; for (; first != last; ++first) s << " " << first->first.get() << "=\"" << first->second << "\""; s << ">"; s << node.value_m; s << "</xstr>"; return s; } #endif /*************************************************************************************************/ typedef std::multimap<adobe::name_t, node_t, str_less_t> store_t; typedef store_t::iterator store_iterator; typedef store_t::value_type store_value_type; typedef std::pair<store_iterator, store_iterator> store_range_pair_t; /*************************************************************************************************/ static const adobe::once_name_t attribute_id; static const adobe::once_name_t attribute_lang; static const adobe::once_name_t attribute_platform; static const adobe::once_name_t attribute_context; static store_t* xstr_store_g = 0; static node_t::attribute_set_t* default_context_g = 0; /*************************************************************************************************/ struct attribute_less_t { typedef bool result_type; result_type operator () ( const node_t::attribute_set_t::value_type& x, const node_t::attribute_set_t::value_type& y) const { return (x.first.get() < y.first.get()) || (x.first == y.first && x.second.get() < y.second.get()); } }; /*************************************************************************************************/ struct attribute_key_less_t { typedef bool result_type; result_type operator () ( const node_t::attribute_set_t::value_type& x, const node_t::attribute_set_t::value_type& y) const { return x.first.get() < y.first.get(); } }; /*************************************************************************************************/ void init_xstr_once() { static store_t xstr_store_s; static node_t::attribute_set_t default_context_s; adobe::remove_const(attribute_id) = adobe::name_t("id"); adobe::remove_const(attribute_lang) = adobe::name_t("lang"); adobe::remove_const(attribute_platform) = adobe::name_t("platform"); adobe::remove_const(attribute_context) = adobe::name_t("context"); #if ADOBE_PLATFORM_MAC default_context_s.push_back(std::make_pair(attribute_platform, adobe::static_name_t("macintosh"))); #elif ADOBE_PLATFORM_WIN default_context_s.push_back(std::make_pair(attribute_platform, adobe::static_name_t("windows"))); #else default_context_s.push_back(std::make_pair(attribute_platform, adobe::static_name_t("unknown"))); #endif default_context_s.push_back(std::make_pair(attribute_lang, adobe::static_name_t("en-us"))); adobe::sort(default_context_s, attribute_less_t()); xstr_store_g = &xstr_store_s; default_context_g = &default_context_s; } /*************************************************************************************************/ void node_t::attribute_value(const adobe::name_t attribute, adobe::name_t& value) const { attribute_set_t::const_iterator result = adobe::lower_bound( attribute_set_m, std::make_pair(value, adobe::name_t()), adobe::compare_members(&attribute_set_t::value_type::first)); if (result != attribute_set_m.end() && result->first == attribute) value = result->second; } /*************************************************************************************************/ long node_attribute_likeness(const node_t& original, const node_t& test) { bool is_proper_subset(is_subset(original.attribute_set_m, test.attribute_set_m, attribute_less_t())); long result(is_proper_subset ? test.attribute_set_m.size() : 0); #if 0 std::cerr << " Likeness test:\n" << " orig: " << original << "\n" << " test: " << test << "\n" << " rslt: " << result << std::endl; #endif return result; } /*************************************************************************************************/ bool operator == (const node_t& x, const node_t& y) { if (x.attribute_set_m.size() != y.attribute_set_m.size()) return false; // don't compare the value_m of the node, because it is not essential data return node_attribute_likeness(x, y) == x.attribute_set_m.size(); } /*************************************************************************************************/ store_iterator xstr_exact_match(store_range_pair_t range, const store_t::mapped_type& searching) { store_iterator first(range.first); store_iterator last(range.second); for (; first != last; ++first) if (first->second == searching) break; return first; } /*************************************************************************************************/ store_iterator xstr_closest_match(store_range_pair_t range, const store_t::mapped_type& searching) { typedef std::pair<long, store_iterator> likeness_t; typedef std::vector<likeness_t> likeness_set_t; store_iterator first(range.first); store_iterator last(range.second); long range_size(std::distance(first, last)); if (range_size < 2) return first; likeness_set_t likeness_set; for (; first != last; ++first) likeness_set.push_back(std::make_pair(node_attribute_likeness(first->second, searching), first)); adobe::sort(likeness_set, adobe::compare_members(&likeness_t::first)); if (likeness_set.size() > 1) { likeness_set_t::iterator highest(--likeness_set.end()); likeness_set_t::iterator next_highest(highest - 1); if (highest->first == next_highest->first) { #if DOING_SERIALIZATION std::stringstream errstr; errstr << "xstr: ambiguous closest match. Looking for:\n " << searching << "\nfound at least:\n " << highest->second->second << "\nand\n " << next_highest->second->second; throw std::runtime_error(errstr.str()); #else throw std::runtime_error("xstr: ambiguous closest match"); #endif } } return (--likeness_set.end())->second; } /*************************************************************************************************/ store_range_pair_t range_for_key(store_t& store, const store_t::key_type& key) { store_range_pair_t range(store.equal_range(key)); if (range.first != range.second && range.first->first != key) range.first = range.second; return range; } /*************************************************************************************************/ node_t* xstr_store(const store_t::key_type& key, const store_t::mapped_type& mapped) { if (key == store_t::key_type()) throw std::runtime_error("xstr: id missing"); // NOTE (fbrereto) : This has been commented out because copy_to_xstr_store is the only // function that accesses this function. We are interested in grabbing // the mutex only once to make this the fastest group add possible, so // we grab the mutex in copy_to_xstr_store instead of here. If you decide // to change this function's relationship to the rest of this code, make // sure you know what you are doing, lest you introduce a threading issue. // ADOBE_GLOBAL_MUTEX_INSTANCE(xstr_store); store_iterator result(xstr_store_g->end()); store_range_pair_t range(range_for_key(*xstr_store_g, key)); bool add(range.first == range.second); if (!add) { store_iterator match(xstr_exact_match(range, mapped)); if (match != range.second) result = match; else add = true; } if (add) result = xstr_store_g->insert(std::make_pair(key, mapped)); assert (result != xstr_store_g->end()); return &result->second; } /*************************************************************************************************/ void assign_name(adobe::name_t& name, const char* first, const char* last) { name = adobe::name_t(first, last - first); } /*************************************************************************************************/ typedef std::vector<node_t> node_set_t; /*************************************************************************************************/ void merge_attributes(const node_t::attribute_set_t& src1, const node_t::attribute_set_t& src2, node_t::attribute_set_t& dst) { // This merges two attribute sets together and places the union into dst. // In the case where both attribute sets have an element with the same name, // the value in the union will be from src1. The values are not compared when // performing the union. This means the "override" attribute set should be the // first source when you call this function. node_t::attribute_set_t merged; adobe::set_union(src1, src2, std::back_inserter(merged), attribute_key_less_t()); dst = merged; } /*************************************************************************************************/ void push_and_clear_node(node_set_t& node_set, node_t& new_node) { adobe::sort(new_node.attribute_set_m, attribute_less_t()); { ADOBE_GLOBAL_MUTEX_INSTANCE(xstr_context); merge_attributes(new_node.attribute_set_m, *default_context_g, new_node.attribute_set_m); } node_set.push_back(new_node); new_node = node_t(); } /*************************************************************************************************/ struct xstr_parser { explicit xstr_parser(node_set_t& node_set); void do_parse(const adobe::xstr_t::parse_range_t& parse_range); private: node_t::attribute_t cur_attribute; node_t cur_node; // NOTE (fbrereto) : Naming conventions for these rules should follow XML 1.1 spec: // http://www.w3.org/TR/2004/REC-xml11-20040204/ // Note that we are interested in following the XML spec as closely // as possible, but because we are intentionally using a subset of // XML the grammar is clipped in places. boost::spirit::rule<> s; // http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-S boost::spirit::rule<> document; // http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-document boost::spirit::rule<> element; // http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-element boost::spirit::rule<> empty_elem_tag; // http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-EmptyElemTag boost::spirit::rule<> s_tag; // http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-STag boost::spirit::rule<> e_tag; // http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-ETag boost::spirit::rule<> name_start_char; // http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-NameStartChar boost::spirit::rule<> name_char; // http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-NameChar boost::spirit::rule<> name; // http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-Name boost::spirit::rule<> eq; // http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-Eq boost::spirit::rule<> attribute; // http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-Attribute boost::spirit::rule<> content; // http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-content boost::spirit::rule<> att_value; // http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-AttValue }; /*************************************************************************************************/ xstr_parser::xstr_parser(node_set_t& node_set) { s = +(boost::spirit::space_p); document = !s >> *(element[boost::bind(&push_and_clear_node, boost::ref(node_set), boost::ref(cur_node))] >> !s); element = (s_tag >> content[boost::bind(assign_name, boost::ref(cur_node.value_m), _1, _2)] >> e_tag) | empty_elem_tag; empty_elem_tag = "<xstr" >> *(s >> attribute) >> !s >> "/>"; s_tag = "<xstr" >> *(s >> attribute[boost::spirit::push_back_a(cur_node.attribute_set_m, cur_attribute)]) >> !s >> '>'; e_tag = "</xstr" >> !s >> '>'; name_start_char = boost::spirit::alpha_p | '_' | ':'; name_char = name_start_char | boost::spirit::digit_p | '-' | '.'; name = name_start_char >> *(name_char); eq = !s >> '=' >> !s; attribute = name[boost::bind(assign_name, boost::ref(cur_attribute.first), _1, _2)] >> eq >> att_value; content = *(boost::spirit::print_p - '<'); att_value = '\"' >> (*(boost::spirit::print_p - '<' - '&' - '\"'))[boost::bind(assign_name, boost::ref(cur_attribute.second), _1, _2)] >> '\"' | '\'' >> (*(boost::spirit::print_p - '<' - '&' - '\''))[boost::bind(assign_name, boost::ref(cur_attribute.second), _1, _2)] >> '\''; BOOST_SPIRIT_DEBUG_NODE(s); BOOST_SPIRIT_DEBUG_NODE(document); BOOST_SPIRIT_DEBUG_NODE(element); BOOST_SPIRIT_DEBUG_NODE(empty_elem_tag); BOOST_SPIRIT_DEBUG_NODE(s_tag); BOOST_SPIRIT_DEBUG_NODE(e_tag); BOOST_SPIRIT_DEBUG_NODE(name_start_char); BOOST_SPIRIT_DEBUG_NODE(name_char); BOOST_SPIRIT_DEBUG_NODE(name); BOOST_SPIRIT_DEBUG_NODE(eq); BOOST_SPIRIT_DEBUG_NODE(attribute); BOOST_SPIRIT_DEBUG_NODE(content); BOOST_SPIRIT_DEBUG_NODE(att_value); } /*************************************************************************************************/ void xstr_parser::do_parse(const adobe::xstr_t::parse_range_t& parse_range) { std::string definition(parse_range.first, parse_range.second); if (!boost::spirit::parse(definition.c_str(), document).full) throw std::runtime_error("xstr: parse string Ill-formed"); } /*************************************************************************************************/ void copy_to_xstr_store(const node_set_t::value_type& x) { adobe::name_t id; x.attribute_value(attribute_id, id); ADOBE_GLOBAL_MUTEX_INSTANCE(xstr_store); xstr_store(id, x); } /*************************************************************************************************/ node_t::attribute_set_t dictionary_to_attribute_set(const adobe::dictionary_t& dict) { node_t::attribute_set_t result; adobe::dictionary_t::const_iterator first(dict.begin()); adobe::dictionary_t::const_iterator last(dict.end()); for (; first != last; ++first) { adobe::name_t name(first->first); adobe::name_t att_value(first->second.template get<std::string>().c_str()); result.push_back(std::make_pair(name, att_value)); } adobe::sort(result, attribute_less_t()); return result; } /*************************************************************************************************/ void fill_glossary(const adobe::xstr_t::parse_range_t& parse_range, bool append) { if (!append) { ADOBE_GLOBAL_MUTEX_INSTANCE(xstr_store); xstr_store_g->clear(); } node_set_t node_set; xstr_parser(node_set).do_parse(parse_range); adobe::for_each(node_set, copy_to_xstr_store); } /*************************************************************************************************/ } // namespace /*************************************************************************************************/ namespace adobe { /*************************************************************************************************/ struct xstr_t::implementation_t { explicit implementation_t(const char* xstr) { initialize(xstr, xstr + std::strlen(xstr), adobe::dictionary_t()); } implementation_t(const char* xstr, std::size_t n) { initialize(xstr, xstr + n, adobe::dictionary_t()); } implementation_t(const char* xstr, std::size_t n, const adobe::dictionary_t& context) { initialize(xstr, xstr + n, context); } const char* get() const; private: implementation_t(); void initialize(const char* first, const char* last, const adobe::dictionary_t& context); adobe::name_t utf8_m; }; /*************************************************************************************************/ void xstr_t::implementation_t::initialize(const char* first, const char* last, const adobe::dictionary_t& context) { // parse out the xstr definition node_set_t node_set; xstr_parser(node_set).do_parse(std::make_pair(first, last)); if (node_set.size() > 1) throw std::runtime_error("xstr: Illegal initialization"); adobe::name_t key; node_t& node(*node_set.begin()); node.attribute_value(attribute_id, key); // impose the default context information { ADOBE_GLOBAL_MUTEX_INSTANCE(xstr_context); merge_attributes(node.attribute_set_m, *default_context_g, node.attribute_set_m); } // impose runtime context information if applicable if (!context.empty()) merge_attributes(dictionary_to_attribute_set(context), node.attribute_set_m, node.attribute_set_m); // do the actual lookup of the xstr ADOBE_GLOBAL_MUTEX_INSTANCE(xstr_store); store_iterator result(xstr_closest_match(range_for_key(*xstr_store_g, key), node)); utf8_m = (result != xstr_store_g->end()) ? result->second.value_m : node.value_m; } /*************************************************************************************************/ const char* xstr_t::implementation_t::get() const { return utf8_m.get(); } /*************************************************************************************************/ xstr_t::xstr_t(const char* xstr) : object_m(new implementation_t(xstr)) { } xstr_t::xstr_t(const char* xstr, std::size_t n) : object_m(new implementation_t(xstr, n)) { } xstr_t::xstr_t(const char* xstr, std::size_t n, const adobe::dictionary_t& context) : object_m(new implementation_t(xstr, n, context)) { } xstr_t::xstr_t(const xstr_t& rhs) : object_m(new implementation_t(*rhs.object_m)) { } xstr_t::~xstr_t() { delete object_m; } xstr_t& xstr_t::operator = (const xstr_t& rhs) { *object_m = *rhs.object_m; return *this; } const char* xstr_t::get() const { return object_m->get(); } void xstr_t::assign_glossary(const parse_range_t& parse_range) { fill_glossary(parse_range, false); } void xstr_t::append_glossary(const parse_range_t& parse_range) { fill_glossary(parse_range, true); } void xstr_t::set_default_context(const adobe::dictionary_t& context) { node_t::attribute_set_t new_context(dictionary_to_attribute_set(context)); adobe::sort(new_context, attribute_less_t()); ADOBE_GLOBAL_MUTEX_INSTANCE(xstr_context); *default_context_g = new_context; } adobe::dictionary_t xstr_t::get_default_context() { adobe::dictionary_t result; adobe::dictionary_t::write_reference dict(result.write()); { ADOBE_GLOBAL_MUTEX_INSTANCE(xstr_context); node_t::attribute_set_t::const_iterator first(default_context_g->begin()); node_t::attribute_set_t::const_iterator last(default_context_g->end()); for (; first != last; ++first) dict[first->first] = adobe::value_t(first->second); } return result; } /*************************************************************************************************/ } // namespace adobe /*************************************************************************************************/ ADOBE_ONCE_DEFINITION(xstr_once, init_xstr_once) /*************************************************************************************************/ |