[Assorted-commits] SF.net SVN: assorted: [380] hash-join/trunk/src/hashjoin.cc
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-02-11 23:23:00
|
Revision: 380 http://assorted.svn.sourceforge.net/assorted/?rev=380&view=rev Author: yangzhang Date: 2008-02-11 15:23:00 -0800 (Mon, 11 Feb 2008) Log Message: ----------- added resize notices; fixed push_bucket Modified Paths: -------------- hash-join/trunk/src/hashjoin.cc Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-11 23:19:42 UTC (rev 379) +++ hash-join/trunk/src/hashjoin.cc 2008-02-11 23:23:00 UTC (rev 380) @@ -31,9 +31,20 @@ using namespace commons; // TODO: using namespace boost; +typedef hash_map<const char*, const void*, hash<const char*>, eqstr> my_hash_map; +class hmap : public my_hash_map +{ +public: + void + resize(size_type hint) + { + cout << "resizing " << this << " to " << hint << endl; + my_hash_map::resize(hint); + } +}; + // TODO use dependency injection! unsigned int ncpus = 1; -typedef hash_map<const char *, const void *, hash<const char *>, eqstr> hmap; const hmap::size_type map_size = 10000000; /** @@ -68,7 +79,10 @@ class db { public: - db(const char *path) : buf(load_file(path, buflen, ncpus)) {} + db(const char *path) : buf(load_file(path, buflen, ncpus)) + { + scan(buf, buflen); + } /** * Run hash-partitioning phase on all processors. */ @@ -237,8 +251,8 @@ db::push_bucket(char **heads, bucket *bs, const char *s, const char *p, size_t nbytes) { size_t h = hash_djb2(s); - unsigned int bucket = h % (map_size * ncpus) / map_size; - size_t bucket_size = max(1000000UL,buflen / ncpus * 3); + unsigned int bucket = h % ncpus; + size_t bucket_size = max(1000000UL, buflen * 3 / ncpus); if (heads[bucket] + nbytes < bs[bucket].bufs.back() + bucket_size) { memcpy(heads[bucket], p, nbytes); heads[bucket] += nbytes; @@ -369,6 +383,7 @@ movdb::build1(unsigned int pid, const bucket **movbucs, hmap *ph) { hmap &h = *ph; + h.resize(map_size); // Visit each bucket that's destined to us (visit each source). for (unsigned int i = 0; i < ncpus; i++) { const vector<char*>& bufs = movbucs[i][pid].bufs; @@ -434,7 +449,7 @@ hits++; join(title, name); } else { - if (misses == 0) cerr << "MISS: '" << title << '\'' << endl; + if (misses == 0) cerr << "MISS: '" << title << '\'' << endl; misses++; } // End of a tuple? (Don't actually need this check, since the This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |