[Assorted-commits] SF.net SVN: assorted: [328] hash-join/trunk/src/hashjoin.cc
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-02-06 17:09:49
|
Revision: 328 http://assorted.svn.sourceforge.net/assorted/?rev=328&view=rev Author: yangzhang Date: 2008-02-06 09:09:53 -0800 (Wed, 06 Feb 2008) Log Message: ----------- fixed some bugs Modified Paths: -------------- hash-join/trunk/src/hashjoin.cc Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-06 16:19:42 UTC (rev 327) +++ hash-join/trunk/src/hashjoin.cc 2008-02-06 17:09:53 UTC (rev 328) @@ -338,7 +338,7 @@ //size_t bucket_size = max(1000000,buflen / ncpus * 2); //2 * buflen / ncpus; int bucket_size = max(1000000UL,buflen / ncpus * 3); if (heads[bucket] + nbytes < bs[bucket].bufs.back() + bucket_size) { - memcpy(heads[bucket], s, nbytes); + memcpy(heads[bucket], p, nbytes); heads[bucket] += nbytes; return -1; } else { @@ -390,7 +390,7 @@ for (unsigned int i = 0; i < ncpus; i++) { bs[i].sz.back() = heads[i] - bs[i].bufs.back(); } - cout << "movie count " << counter << " vs " << bs[0].sz.back()<< endl; + cout << "movie count " << counter << " nbytes " << bs[0].sz.back()<< endl; } void @@ -415,7 +415,7 @@ // Statistics (TODO dynamic allocation) int counter = 0, mincount = INT_MAX; char *p = partstart, *end = partend; - while (p < end - 999) { + while (p < end) { char *name = p; p = strchr(p, '\0') + 1; strcpy(tmp, name); @@ -426,12 +426,14 @@ p = strchr(p, '\0') + 1; strcpy(subtmp, title); - size_t strl = strlen(subtmp); + size_t tmplen = subtmp + strlen(subtmp) + 2 - tmp; + check(tmplen < 1024); + tmp[tmplen-1] = '\0'; // Copy this line into the correct local bucket. //cout << "hashing " << title << endl; unsigned int bbb; - if (-1 != (bbb = push_bucket(heads, bs, title, tmp, subtmp + strl + 1 - tmp))) { + if (-1 != (bbb = push_bucket(heads, bs, title, tmp, tmplen))) { //size_t bucket_size = max(1000000,buflen / ncpus * 2); //2 * buflen / ncpus; //int bucket_size = max(1000000UL,buflen / ncpus * 3); //cout << "FUCK " << heads[0] - bs[0].buf << " " << bucket_size << " " << heads[1] - bs[1].buf << " " << p - title << endl; @@ -453,7 +455,7 @@ for (unsigned int i = 0; i < ncpus; i++) { bs[i].sz.back() = heads[i] - bs[i].bufs.back(); } - cout << "actress count " << counter << " vs " << bs[0].sz.back()<< endl; + cout << "actress count " << counter << " nbytes " << bs[0].sz.back()<< endl; } const hmap * @@ -514,23 +516,24 @@ actdb::probe1(unsigned int pid, const hmap *hh, const bucket **actbucs) { const hmap &h = *hh; + int hits = 0, misses = 0; for (unsigned int i = 0; i < ncpus; i++) { char *p = actbucs[i][pid].bufs[0], *end = actbucs[i][pid].bufs[0] + actbucs[i][pid].sz[0]; - int hits = 0, misses = 0; while (p < end) { char *name = p; p = strchr(p, '\0') + 1; while (true) { char *title = p; p = strchr(p, '\0') + 1; - // cout << "name " << name << "title: " << title << p - title << endl; + //cout << "name " << name << " title: " << title << p - title << endl; // Emit any joined tuple. if (h.find(title) != h.end()) { //cout << " HIT" << endl; hits++; join(title, name); } else { + //cout << " MISS" << endl; misses++; } // End of tuple? @@ -540,9 +543,8 @@ } } } - //cout << "cpu " << pid << " src " << i << " hits " << hits << " misses " << - //misses << endl; } + cout << "cpu " << pid << " hits " << hits << " misses " << misses << endl; } // vim:et:sw=2:ts=2 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |