[Assorted-commits] SF.net SVN: assorted: [327] hash-join/trunk/src/hashjoin.cc
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-02-06 16:19:47
|
Revision: 327 http://assorted.svn.sourceforge.net/assorted/?rev=327&view=rev Author: yangzhang Date: 2008-02-06 08:19:42 -0800 (Wed, 06 Feb 2008) Log Message: ----------- fixed the obscene replication Modified Paths: -------------- hash-join/trunk/src/hashjoin.cc Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-06 16:01:30 UTC (rev 326) +++ hash-join/trunk/src/hashjoin.cc 2008-02-06 16:19:42 UTC (rev 327) @@ -204,22 +204,23 @@ int main(int argc, char *argv[]) { - if (argc != 3) { - fprintf(stderr, "hashjoin <ncpus> <actresses>\n"); + if (argc != 4) { + fprintf(stderr, "hashjoin <ncpus> <movies> <actresses>\n"); exit(1); } ncpus = atoi(argv[1]); - const char *actresses = argv[2]; + const char *movies = argv[2]; + const char *actresses = argv[3]; timer t("main time: "); cout << "loading movies" << endl; - movdb mdb("../movie-data/movies.dat"); + movdb mdb(movies); // "../movie-data/movies.dat" t.print(); cout << "loading actresses" << endl; - actdb adb(actresses);//"../movie-data/mdactresses.dat"); + actdb adb(actresses); // "../movie-data/mdactresses.dat" t.print(); cout << "hash-partitioning movies into per-core buckets" << endl; @@ -409,6 +410,7 @@ for (unsigned int i = 0; i < ncpus; i++) { heads[i] = bs[i].bufs[0]; } + char tmp[1024]; // Statistics (TODO dynamic allocation) int counter = 0, mincount = INT_MAX; @@ -416,15 +418,20 @@ while (p < end - 999) { char *name = p; p = strchr(p, '\0') + 1; + strcpy(tmp, name); + char *subtmp = tmp + strlen(name) + 1; char *tuple_end = unsafe_strstr(p, "\0\0", end) + 2; while (true) { char *title = p; p = strchr(p, '\0') + 1; + strcpy(subtmp, title); + size_t strl = strlen(subtmp); + // Copy this line into the correct local bucket. //cout << "hashing " << title << endl; unsigned int bbb; - if (-1 != (bbb = push_bucket(heads, bs, title, name, tuple_end - name))) { + if (-1 != (bbb = push_bucket(heads, bs, title, tmp, subtmp + strl + 1 - tmp))) { //size_t bucket_size = max(1000000,buflen / ncpus * 2); //2 * buflen / ncpus; //int bucket_size = max(1000000UL,buflen / ncpus * 3); //cout << "FUCK " << heads[0] - bs[0].buf << " " << bucket_size << " " << heads[1] - bs[1].buf << " " << p - title << endl; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |