[Assorted-commits] SF.net SVN: assorted:[1288] cpp-commons/trunk/src/commons/snap_map.h
Brought to you by:
yangzhang
From: <yan...@us...> - 2009-03-12 19:54:06
|
Revision: 1288 http://assorted.svn.sourceforge.net/assorted/?rev=1288&view=rev Author: yangzhang Date: 2009-03-12 19:53:53 +0000 (Thu, 12 Mar 2009) Log Message: ----------- added snap_map Added Paths: ----------- cpp-commons/trunk/src/commons/snap_map.h Copied: cpp-commons/trunk/src/commons/snap_map.h (from rev 1284, cpp-commons/trunk/src/commons/fast_map.h) =================================================================== --- cpp-commons/trunk/src/commons/snap_map.h (rev 0) +++ cpp-commons/trunk/src/commons/snap_map.h 2009-03-12 19:53:53 UTC (rev 1288) @@ -0,0 +1,323 @@ +#ifndef COMMONS_DENSE_HASH_MAP_H +#define COMMONS_DENSE_HASH_MAP_H + +#include <boost/functional/hash.hpp> +#include <commons/array.h> +#include <commons/assert.h> +#include <commons/exceptions.h> +#include <utility> + +namespace commons { + + using namespace boost; + using namespace std; + + // TODO write unit tests + + template<typename Key, typename Data> class snap_map; + template<typename map_traits> class snap_map_iterator; + template<typename map_traits> class snap_map_const_iterator; + + template<typename Key, typename Data> + struct snap_map_traits + { + typedef snap_map<Key, Data> map_type; + typedef Key key_type; + typedef Data data_type; + typedef pair<key_type, data_type> value_type; + typedef snap_map_traits<Key, Data> traits; + + typedef value_type &reference; + typedef const value_type &const_reference; + typedef value_type *pointer; + typedef const value_type *const_pointer; + typedef snap_map_iterator<traits> iterator; + typedef snap_map_const_iterator<traits> const_iterator; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + }; + + template<typename map_traits> + class snap_map_const_iterator + { + public: + typedef typename map_traits::map_type map_type; + typedef typename map_traits::iterator iterator; + typedef typename map_traits::const_iterator const_iterator; + typedef typename map_traits::value_type value_type; + typedef typename map_traits::reference reference; + typedef typename map_traits::const_reference const_reference; + typedef typename map_traits::pointer pointer; + typedef typename map_traits::const_pointer const_pointer; + typedef typename map_traits::size_type size_type; + typedef typename map_traits::difference_type difference_type; + typedef forward_iterator_tag iterator_category; + private: + const map_type &m; + const value_type *p; + const value_type *end; + public: + snap_map_const_iterator(const map_type &m, const value_type *p) + : m(m), p(p), end(m.get_table().end()) { increment(); } + void increment() { for (; p != end && m.empty_or_deleted(p->first); ++p) {} } + const_iterator &operator++() { + ++p; + increment(); + return *this; + } + const_iterator operator++(int) { + snap_map_const_iterator copy = *this; + ++*this; + return copy; + } + const_reference operator*() const { return *p; } + const_pointer operator->() const { return p; } + bool operator==(const const_iterator &it) const { return p == it.p; } + bool operator!=(const const_iterator &it) const { return p != it.p; } + }; + + template<typename map_traits> + class snap_map_iterator + { + public: + typedef typename map_traits::map_type map_type; + typedef typename map_traits::iterator iterator; + typedef typename map_traits::const_iterator const_iterator; + typedef typename map_traits::value_type value_type; + typedef typename map_traits::reference reference; + typedef typename map_traits::const_reference const_reference; + typedef typename map_traits::pointer pointer; + typedef typename map_traits::const_pointer const_pointer; + typedef typename map_traits::size_type size_type; + typedef typename map_traits::difference_type difference_type; + typedef forward_iterator_tag iterator_category; + private: + map_type &m; + value_type *p; + value_type *end; + public: + snap_map_iterator(map_type &m, value_type *p) + : m(m), p(p), end(m.get_table().end()) { increment(); } + void increment() { for (; p != end && m.empty_or_deleted(p->first); ++p) {} } + iterator &operator++() { + ++p; + increment(); + return *this; + } + iterator operator++(int) { + snap_map_iterator copy = *this; + ++*this; + return copy; + } + reference operator*() const { return *p; } + pointer operator->() const { return p; } + bool operator==(const iterator &it) const { return p == it.p; } + bool operator!=(const iterator &it) const { return p != it.p; } + }; + + // XXX snapshotting is complete hack. based on fast_map. + /** + * Quadratic internal probing with triangular numbers, a la + * google::dense_hash_map. Took the overall design from + * google::dense_hash_map, and also a few speed-up techniques from peeking at + * their source. + * + * This was originally written for ydb, where the map was the bottleneck in + * the application, but more importantly, the interface required the ability + * to serialize the map, and partially. The internals have to be exposed in + * order for fast serialization (i.e. memcpy) to be possible; also, the + * ability to serialize just a certain range of the snap_map was required. + */ + template <typename Key, typename Data> + class snap_map + { + public: + typedef snap_map_traits<Key, Data> traits; + typedef typename traits::map_type map_type; + typedef typename traits::iterator iterator; + typedef typename traits::const_iterator const_iterator; + typedef typename traits::key_type key_type; + typedef typename traits::data_type data_type; + typedef typename traits::value_type value_type; + typedef typename traits::reference reference; + typedef typename traits::const_reference const_reference; + typedef typename traits::pointer pointer; + typedef typename traits::const_pointer const_pointer; + typedef map<size_t, unique_ptr<array<value_type> > > shadowmap; + + private: + static const size_t initsize = 1 << 20, pagesize = 1 << 15, pagemask = pagesize - 1; + commons::array<value_type> table; + key_type empty_key, deleted_key; + size_t count; + boost::hash<key_type> hasher; + shadowmap shadows; + bool has_empty_key, has_deleted_key, snapshot; + + void resize(size_t size) { + ASSERT((size & (size - 1)) == 0); + commons::array<value_type> newtab(0); + char *newtabarr = new char[size * sizeof(value_type)]; + newtab.reset(reinterpret_cast<value_type*>(newtabarr), size); + for (size_t i = 0; i < size; ++i) + newtab[i].first = empty_key; + // Rehash old values over into new table. + size_t mask = newtab.size() - 1; + for (size_t i = 0; i < table.size(); ++i) { + if (table[i].first != empty_key && table[i].first != deleted_key) { + // Don't want to simply reuse the general lookup, since we know + // we're starting with a blank slate. + size_t pos = hasher(table[i].first) & mask; + for (size_t probe = 0; + newtab[pos].first != empty_key; + pos = (pos + ++probe) & mask) { + ASSERT(probe < newtab.size()); + } + newtab[pos] = table[i]; + } + } + swap(newtab, table); + } + + void grow() { resize(table.size() << 1); } + void shrink() { resize(table.size() >> 1); } + void assert_init() const { ASSERT(has_empty_key && has_deleted_key); } + size_t page(size_t pos) { return pagemask & (pos / sizeof(value_type)); } + + public: + snap_map() : + table(0), empty_key(0), deleted_key(0), count(0), has_empty_key(false), + has_deleted_key(false), snapshot(false) {} + void set_empty_key(key_type k) { + has_empty_key = true; + empty_key = k; + resize(initsize); + } + void set_deleted_key(key_type k) { + has_deleted_key = true; + deleted_key = k; + resize(initsize); + } + size_t size() const { return count; } + void set_size(size_t size) { count = size; } + void erase(iterator) { throw_not_implemented(); } + array<value_type> &get_table() { return table; } + const array<value_type> &get_table() const { return table; } + bool empty_or_deleted(key_type k) const { + return k == empty_key || k == deleted_key; + } + + iterator begin() { + assert_init(); + if (count == 0) return end(); + else return iterator(*this, table.begin()); + } + const_iterator begin() const { + assert_init(); + if (count == 0) return end(); + else return const_iterator(*this, table.begin()); + } + + iterator end() { + assert_init(); + return iterator(*this, table.end()); + } + const_iterator end() const { + assert_init(); + return const_iterator(*this, table.end()); + } + + const_iterator find(key_type k) const { + assert_init(); + size_t probe = 0, mask = table.size() - 1, pos = hasher(k) & mask; + while (true) { + if (table[pos].first == empty_key) return end(); + if (table[pos].first == k) return const_iterator(*this, &table[pos]); + pos = (pos + ++probe) & mask; + ASSERT(probe < table.size()); + } + } + + value_type &get(size_t pos) { + typename shadowmap::iterator it = shadows.find(page(pos)); + value_type &v = it == shadows.end() ? table[pos] : (*(it->second))[pos]; + return v; + } + + iterator find(key_type k) { + assert_init(); + size_t probe = 0, mask = table.size() - 1, pos = hasher(k) & mask; + if (snapshot) { + while (true) { + value_type &v = get(pos); + if (v.first == empty_key) return end(); + if (v.first == k) return iterator(*this, &v); + pos = (pos + ++probe) & mask; + ASSERT(probe < table.size()); + } + } else { + while (true) { + if (table[pos].first == empty_key) return end(); + if (table[pos].first == k) return iterator(*this, &table[pos]); + pos = (pos + ++probe) & mask; + ASSERT(probe < table.size()); + } + } +#if 0 + for (; + table[pos].first != empty_key && table[pos].first != k; + pos = (pos + ++probe) & mask) { + ASSERT(probe < table.size()); + } + if (table[pos].first == empty_key) return end(); + else return &table[pos]; +#endif + } + + data_type &operator[](key_type k) { + assert_init(); + size_t probe = 0, mask = table.size() - 1, pos = hasher(k) & mask; + // Lookup. Skip over deleted entries (but remembering the first one + // seen as an open slot if we later decide to insert). If we find an + // empty spot, try returning the earlier deleted spot first. If we + // find the key, return that. + for (; + table[pos].first != deleted_key && + table[pos].first != empty_key && + table[pos].first != k; + pos = (pos + ++probe) & mask) { + ASSERT(probe < table.size()); + } + if (table[pos].first == deleted_key) { + size_t first_deleted = pos; + for (; + table[pos].first != empty_key && table[pos].first != k; + pos = (pos + ++probe) & mask) { + ASSERT(probe < table.size()); + } + if (table[pos].first == empty_key) { + // Inserting new value_type. Grow table if necessary. + if (++count > table.size() * 3 / 4) { + --count; + grow(); + return (*this)[k]; + } + pos = first_deleted; + table[pos].first = k; + } + } else if (table[pos].first == empty_key) { + // Inserting new value_type. Grow table if necessary. + if (++count > table.size() * 3 / 4) { + --count; + grow(); + return (*this)[k]; + } + table[pos].first = k; + } + return table[pos].second; + } + }; + +} + +#endif Property changes on: cpp-commons/trunk/src/commons/snap_map.h ___________________________________________________________________ Added: svn:mergeinfo + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |