assorted-commits Mailing List for Assorted projects (Page 63)
Brought to you by:
yangzhang
You can subscribe to this list here.
2007 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(9) |
Dec
(12) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2008 |
Jan
(86) |
Feb
(265) |
Mar
(96) |
Apr
(47) |
May
(136) |
Jun
(28) |
Jul
(57) |
Aug
(42) |
Sep
(20) |
Oct
(67) |
Nov
(37) |
Dec
(34) |
2009 |
Jan
(39) |
Feb
(85) |
Mar
(96) |
Apr
(24) |
May
(82) |
Jun
(13) |
Jul
(10) |
Aug
(8) |
Sep
(2) |
Oct
(20) |
Nov
(31) |
Dec
(17) |
2010 |
Jan
(16) |
Feb
(11) |
Mar
(17) |
Apr
(53) |
May
(31) |
Jun
(13) |
Jul
(3) |
Aug
(6) |
Sep
(11) |
Oct
(4) |
Nov
(17) |
Dec
(17) |
2011 |
Jan
(3) |
Feb
(19) |
Mar
(5) |
Apr
(17) |
May
(3) |
Jun
(4) |
Jul
(14) |
Aug
(3) |
Sep
(2) |
Oct
(1) |
Nov
(3) |
Dec
(2) |
2012 |
Jan
(3) |
Feb
(7) |
Mar
(1) |
Apr
|
May
(1) |
Jun
|
Jul
(4) |
Aug
(5) |
Sep
(2) |
Oct
(3) |
Nov
|
Dec
|
2013 |
Jan
|
Feb
|
Mar
(9) |
Apr
(5) |
May
|
Jun
(2) |
Jul
(1) |
Aug
(10) |
Sep
(1) |
Oct
(2) |
Nov
|
Dec
|
2014 |
Jan
(1) |
Feb
(3) |
Mar
(3) |
Apr
(1) |
May
(4) |
Jun
|
Jul
|
Aug
|
Sep
(2) |
Oct
|
Nov
|
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(1) |
Nov
|
Dec
|
2016 |
Jan
(1) |
Feb
|
Mar
(2) |
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
|
Nov
|
Dec
|
2017 |
Jan
|
Feb
|
Mar
(1) |
Apr
|
May
(5) |
Jun
(1) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(2) |
2018 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
(1) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: <yan...@us...> - 2008-02-10 18:37:28
|
Revision: 352 http://assorted.svn.sourceforge.net/assorted/?rev=352&view=rev Author: yangzhang Date: 2008-02-10 10:37:27 -0800 (Sun, 10 Feb 2008) Log Message: ----------- added numa-bench Added Paths: ----------- numa-bench/ numa-bench/trunk/ numa-bench/trunk/README numa-bench/trunk/src/ numa-bench/trunk/src/Makefile numa-bench/trunk/src/avail.cc numa-bench/trunk/src/cache.cc numa-bench/trunk/src/malloc.cc numa-bench/trunk/src/thrash.cc Added: numa-bench/trunk/README =================================================================== --- numa-bench/trunk/README (rev 0) +++ numa-bench/trunk/README 2008-02-10 18:37:27 UTC (rev 352) @@ -0,0 +1,6 @@ +This is an assortment of microbenchmarks for understanding the performance +behavior of NUMA systems and for exploring the Linux NUMA API. + +Revelant materials include the [libnuma whitepaper]. + +[libnuma whitepaper]: http://www.novell.com/collateral/4621437/4621437.pdf Added: numa-bench/trunk/src/Makefile =================================================================== --- numa-bench/trunk/src/Makefile (rev 0) +++ numa-bench/trunk/src/Makefile 2008-02-10 18:37:27 UTC (rev 352) @@ -0,0 +1,15 @@ +COMMONS := $(wildcard commons/*.h) +CXX = g++ -I. -lnuma -o $@ $^ + +all: avail cache + +avail: avail.cc $(COMMONS) + $(CXX) + +cache: cache.cc $(COMMONS) + $(CXX) + +clean: + rm -f avail cache + +.PHONY: clean Added: numa-bench/trunk/src/avail.cc =================================================================== --- numa-bench/trunk/src/avail.cc (rev 0) +++ numa-bench/trunk/src/avail.cc 2008-02-10 18:37:27 UTC (rev 352) @@ -0,0 +1,7 @@ +#include <numa.h> + +int +main() +{ + return numa_available() < 0; +} Added: numa-bench/trunk/src/cache.cc =================================================================== --- numa-bench/trunk/src/cache.cc (rev 0) +++ numa-bench/trunk/src/cache.cc 2008-02-10 18:37:27 UTC (rev 352) @@ -0,0 +1,15 @@ +#include <iostream> +#include <commons/cpuid.h> + +using namespace std; +using namespace commons; + +int +main() +{ + unsigned char iline, dline; + cache_line_sizes_amd(&iline, &dline); + cout << "AMD: " << (unsigned int) iline << ' ' << (unsigned int) dline << endl; + cout << "Intel: " << cache_line_sizes_intel() << endl; + return 0; +} Added: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc (rev 0) +++ numa-bench/trunk/src/malloc.cc 2008-02-10 18:37:27 UTC (rev 352) @@ -0,0 +1,33 @@ +// Does malloc tend to allocate locally? + +#include <cstdlib> +#include <iostream> +#include <time.h> +#include <pthread.h> + +using namespace std; + +const size_t size = 10000000; + +void +touch(void *pp) +{ + char *p = (char*) pp; + const int reps = 100; + time_t t0 = time(NULL); + for (int c = 0; c < reps; c++) { + for (size_t i = 0; i < size; i++) { + p[i] = i; + } + } + time_t t1 = time(NULL); + cout << t1 - t0 << endl; +} + +int +main() +{ + void *p = malloc(size); + touch(p); + return 0; +} Added: numa-bench/trunk/src/thrash.cc =================================================================== --- numa-bench/trunk/src/thrash.cc (rev 0) +++ numa-bench/trunk/src/thrash.cc 2008-02-10 18:37:27 UTC (rev 352) @@ -0,0 +1,38 @@ +#include <iostream> +#include <pthread.h> +#include <unistd.h> + +using namespace std; + +const int size = 100000000; + +void* +f(void* p) +{ + int *xs = (int*) p; + for (int i = 0; i < size; i++) { + xs[i] = i; + } + cout << "hello, world" << endl; +} + +int +main() +{ + const int n = 16; + int *xs = new int[size]; + + pthread_attr_t a; + pthread_attr_init(&a); + + pthread_t t[n]; + for (int i = 0; i < n; i++) { + pthread_create(&t[i], &a, &f, xs); + } + + void *x; + for (int i = 0; i < n; i++) { + pthread_join(t[i], &x); + } + return 0; +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-10 18:36:30
|
Revision: 351 http://assorted.svn.sourceforge.net/assorted/?rev=351&view=rev Author: yangzhang Date: 2008-02-10 10:36:33 -0800 (Sun, 10 Feb 2008) Log Message: ----------- imported cpuid.h from numa-bench Modified Paths: -------------- cpp-commons/trunk/src/commons/cppcommons.cpp Added Paths: ----------- cpp-commons/trunk/src/commons/cpuid.h Modified: cpp-commons/trunk/src/commons/cppcommons.cpp =================================================================== --- cpp-commons/trunk/src/commons/cppcommons.cpp 2008-02-10 18:28:56 UTC (rev 350) +++ cpp-commons/trunk/src/commons/cppcommons.cpp 2008-02-10 18:36:33 UTC (rev 351) @@ -24,6 +24,7 @@ #endif #include "commons/check.h" +#include "commons/cpuid.h" #include "commons/files.h" #include "commons/strings.h" #include "commons/time.h" Added: cpp-commons/trunk/src/commons/cpuid.h =================================================================== --- cpp-commons/trunk/src/commons/cpuid.h (rev 0) +++ cpp-commons/trunk/src/commons/cpuid.h 2008-02-10 18:36:33 UTC (rev 351) @@ -0,0 +1,71 @@ +// +// This library would be a straightforward target for auto-generation from a +// spec. +// + +#ifndef _COMMONS_CPUID_H +#define _COMMONS_CPUID_H + +namespace commons +{ + + enum { + CACHE = 2, + CACHE_LINE_SIZES = 0x80000005 + }; + +#define cpuid(func,ax,bx,cx,dx)\ + __asm__ __volatile__ ("cpuid":\ + "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func)); + + /** + * Given an extended general-purpose register (e.g. EAX), extract the high + * 8-bit register (AH). + */ + inline unsigned char + high(unsigned int r) + { + return ((r >> 8) & 0xffU); + } + + /** + * Given an extended general-purpose register (e.g. EAX), extract the low + * 8-bit register (AL). + */ + inline unsigned char + low(unsigned int r) + { + return (r & 0xffU); + } + + /** + * Get cache line size in bytes on an Intel CPU. + * References: + * http://softpixel.com/~cwright/programming/simd/cpuid.php + * http://www.intel.com/software/products/documentation/vlin/mergedprojects/analyzer_ec/mergedprojects/reference_olh/mergedprojects/instructions/instruct32_hh/vc46.htm + */ + inline unsigned short + cache_line_sizes_intel() + { + unsigned int a, b, c, d; + cpuid(1, a, b, c, d); + return (unsigned short) (high(b) * 8); + } + + /** + * Get cache line sizes on an AMD CPU. + * Reference: http://softpixel.com/~cwright/programming/simd/cpuid.php + * Maybe look at: http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/25481.pdf + */ + inline void + cache_line_sizes_amd(unsigned char *iline, unsigned char *dline) + { + int a, b, c, d; + cpuid(CACHE_LINE_SIZES, a, b, c, d); + *dline = low(c); + *iline = low(d); + } + +} + +#endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-10 18:28:51
|
Revision: 350 http://assorted.svn.sourceforge.net/assorted/?rev=350&view=rev Author: yangzhang Date: 2008-02-10 10:28:56 -0800 (Sun, 10 Feb 2008) Log Message: ----------- moved to cpp-commons Modified Paths: -------------- hash-join/trunk/src/Makefile hash-join/trunk/src/hashjoin.cc Removed Paths: ------------- hash-join/trunk/src/method_thread1.h Modified: hash-join/trunk/src/Makefile =================================================================== --- hash-join/trunk/src/Makefile 2008-02-10 18:28:49 UTC (rev 349) +++ hash-join/trunk/src/Makefile 2008-02-10 18:28:56 UTC (rev 350) @@ -1,16 +1,17 @@ TARGET := hashjoin -SRCS := hashjoin.cc +SRCS := hashjoin.cc $(wildcard commons/*.h) ### begin common makefrag -CFLAGS := -Wall -lpthread # -lprofiler +CFLAGS := -I. -Wall -lpthread # -lprofiler CXX = g++ $(CFLAGS) -o $@ $^ -all: opt dbg pg +all: pg dbg: $(TARGET)-dbg opt: $(TARGET)-opt pg: $(TARGET)-pg +goo: $(TARGET)-goo $(TARGET)-pg: $(SRCS) $(CXX) -g -pg Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-10 18:28:49 UTC (rev 349) +++ hash-join/trunk/src/hashjoin.cc 2008-02-10 18:28:56 UTC (rev 350) @@ -12,178 +12,19 @@ #include <fcntl.h> #include <pthread.h> -#include "method_thread1.h" +#include <commons/check.h> +#include <commons/files.h> +#include <commons/strings.h> +#include <commons/threads.h> +#include <commons/time.h> // -// C++ Commons :: NUMA -// - -using namespace std; - -// TODO: Figure out how to create an exception with a useful message. -inline void -_check(bool cond, const char *msg, const char *file, int line) -{ - if (!cond) { - throw exception(); - } -} - -#define check(cond) _check(cond, NULL, __FILE__, __LINE__) - -/** - * Similar to assert(), but is not conditionally compiled, so this is safe to - * use as a guard against expected failures (such as checking return codes). - */ -#define checkmsg(cond, msg) \ - bool b = cond; \ - if (!b) _check(b, (msg), __FILE__, __LINE__) - -/** - * Search in p for the nth instance of c and return the character past it. - */ -inline const char * -strchrrep(const char *p, char c, int n) -{ - for (int i = 0; i < n; i++) { - p = strchr(p, c); - check(p); - p++; - } - return p; -} - -/** - * Search in p for the nth instance of c and return the character past it. - */ -inline char * -strchrrep(char *p, char c, int n) -{ - return const_cast<char *>(strchrrep(const_cast<const char *>(p), c, n)); -} - -/** - * Get the current time in milliseconds. - */ -inline long long -current_time_millis() -{ - long long t; - struct timeval tv; - - gettimeofday(&tv, 0); - - t = tv.tv_sec; - t = (t *1000) + (tv.tv_usec/1000); - - return t; -} - -/** - * Convenience class for performing wall-clock benchmarking. - */ -class timer -{ -public: - timer(const string label) : - label(label), start(current_time_millis()), last(start) {} - void print() - { - long long now = current_time_millis(); - cout << label << now - last << endl; - last = now; - } -private: - const string label; - long long start, last; -}; - -/** - * A functor that checks for string equality. Mainly useful as a template - * parameter to the hash data structures in STL extensions. - */ -struct eqstr -{ - bool operator()(const char* s1, const char* s2) const - { - return strcmp(s1, s2) == 0; - } -}; - -/** - * Look for a substring, but without null-termination conventions. - */ -inline char * -unsafe_strstr(char *p, const char *q, const char *lim) -{ - if (lim == 0) { - while (true) { - for (; !(*p == '\0' && *(p+1) == '\0'); p++); - return p; - } - } else { - check(p < lim); - while (true) { - for (; !(*p == '\0' && *(p+1) == '\0') && p < lim; p++); - if (p == lim) return NULL; - return p; - } - } -} - -/** - * Look for a substring, but without null-termination conventions. - */ -inline const char* -unsafe_strstr(const char *p, const char *q, const char *lim) -{ - return unsafe_strstr((char*) p, q, lim); -} - -/** - * Load an entire file into buf and also give us the length of the buffer. - * TODO this probably isn't very safe, since we're demoting an off_t to a - * size_t. Is there a healthier approach? - */ -char * -load_file(const char *path, size_t & len, unsigned int ncpus) { - struct stat sb; - int fd; - - fd = open(path, 0); - check(fd >= 0); - - check(fstat(fd, &sb) == 0); - check(sb.st_size <= 0xffffffff); - - // TODO Why don't we need (static) cast here? Isn't this a lossy cast? - len = sb.st_size; - - char *buf = new char[len + 1]; - check(buf); - - // TODO Use threads to pull data to the correct initial locations? - size_t chunk_len = len / ncpus; - for (unsigned int i = 0; i < ncpus; i++) { - int off = i *chunk_len; - ssize_t status = pread(fd, buf + off, chunk_len, off); - // We read the whole chunk or hit the end. - size_t nread = static_cast<ssize_t>(status); - check(status != -1 && (nread == chunk_len || off + nread == len)); - } - - check(close(fd) == 0); - - buf[len] = '\0'; // don't let strcmp() run off the end - return buf; -} - -// // Hash Join // using namespace std; using namespace __gnu_cxx; +using namespace commons; // TODO use dependency injection! unsigned int ncpus = 1; @@ -361,7 +202,7 @@ // into bucket[i][j]. pthread_t ts[ncpus]; for (unsigned int i = 0; i < ncpus; i++) { - ts[i] = method_thread1(this, &db::partition1, i, buckets[i]); + ts[i] = method_thread(this, &db::partition1, i, buckets[i]); } for (unsigned int i = 0; i < ncpus; i++) { void *value; @@ -501,7 +342,7 @@ pthread_t ts[ncpus]; hmap *hs = new hmap[ncpus]; for (unsigned int i = 0; i < ncpus; i++) { - ts[i] = method_thread1(this, &movdb::build1, i, movbucs, &hs[i]); + ts[i] = method_thread(this, &movdb::build1, i, movbucs, &hs[i]); } for (unsigned int i = 0; i < ncpus; i++) { void *value; @@ -533,7 +374,7 @@ { pthread_t ts[ncpus]; for (unsigned int i = 0; i < ncpus; i++) { - ts[i] = method_thread1(this, &actdb::probe1, i, &hs[i], actbucs); + ts[i] = method_thread(this, &actdb::probe1, i, &hs[i], actbucs); } for (unsigned int i = 0; i < ncpus; i++) { void *value; Deleted: hash-join/trunk/src/method_thread1.h =================================================================== --- hash-join/trunk/src/method_thread1.h 2008-02-10 18:28:49 UTC (rev 349) +++ hash-join/trunk/src/method_thread1.h 2008-02-10 18:28:56 UTC (rev 350) @@ -1,132 +0,0 @@ -#ifndef method_thread_h -#define method_thread_h - -#include <assert.h> -#include <pthread.h> - -// non-rpc-specific utility to start a thread that runs -// an object method. returns a pthread_t on success, and -// zero on error. -template <class C> pthread_t -method_thread1(C *o, void (C::*m)()) -{ - class XXX { - public: - C *o; - void (C::*m)(); - static void *yyy(void *vvv) { - XXX *x = (XXX*)vvv; - C *o = x->o; - void (C::*m)() = x->m; - delete x; - (o->*m)(); - return 0; - } - }; - XXX *x = new XXX; - x->o = o; - x->m = m; - pthread_t th; - if(pthread_create(&th, NULL, &XXX::yyy, (void *) x) == 0){ - return th; - } - return 0; -} - -template <class C, class A> pthread_t -method_thread1(C *o, void (C::*m)(A), A a) -{ - class XXX { - public: - C *o; - void (C::*m)(A a); - A a; - static void *yyy(void *vvv) { - XXX *x = (XXX*)vvv; - C *o = x->o; - void (C::*m)(A ) = x->m; - A a = x->a; - delete x; - (o->*m)(a); - return 0; - } - }; - XXX *x = new XXX; - x->o = o; - x->m = m; - x->a = a; - pthread_t th; - if(pthread_create(&th, NULL, &XXX::yyy, (void *) x) == 0){ - return th; - } - return 0; -} - -template <class C, class A1, class A2> pthread_t -method_thread1(C *o, void (C::*m)(A1 , A2 ), A1 a1, A2 a2) -{ - class XXX { - public: - C *o; - void (C::*m)(A1 a1, A2 a2); - A1 a1; - A2 a2; - static void *yyy(void *vvv) { - XXX *x = (XXX*)vvv; - C *o = x->o; - void (C::*m)(A1 , A2 ) = x->m; - A1 a1 = x->a1; - A2 a2 = x->a2; - delete x; - (o->*m)(a1, a2); - return 0; - } - }; - XXX *x = new XXX; - x->o = o; - x->m = m; - x->a1 = a1; - x->a2 = a2; - pthread_t th; - if(pthread_create(&th, NULL, &XXX::yyy, (void *) x) == 0){ - return th; - } - return 0; -} - -template <class C, class A1, class A2, class A3> pthread_t -method_thread1(C *o, void (C::*m)(A1 , A2, A3), A1 a1, A2 a2, A3 a3) -{ - class XXX { - public: - C *o; - void (C::*m)(A1 a1, A2 a2, A3 a3); - A1 a1; - A2 a2; - A3 a3; - static void *yyy(void *vvv) { - XXX *x = (XXX*)vvv; - C *o = x->o; - void (C::*m)(A1, A2, A3) = x->m; - A1 a1 = x->a1; - A2 a2 = x->a2; - A3 a3 = x->a3; - delete x; - (o->*m)(a1, a2, a3); - return 0; - } - }; - XXX *x = new XXX; - x->o = o; - x->m = m; - x->a1 = a1; - x->a2 = a2; - x->a3 = a3; - pthread_t th; - if(pthread_create(&th, NULL, &XXX::yyy, (void *) x) == 0){ - return th; - } - return 0; -} - -#endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-10 18:28:45
|
Revision: 349 http://assorted.svn.sourceforge.net/assorted/?rev=349&view=rev Author: yangzhang Date: 2008-02-10 10:28:49 -0800 (Sun, 10 Feb 2008) Log Message: ----------- imported code from hash-join; broke autotools Modified Paths: -------------- cpp-commons/trunk/src/Makefile.am Added Paths: ----------- cpp-commons/trunk/src/commons/ cpp-commons/trunk/src/commons/check.h cpp-commons/trunk/src/commons/cppcommons.cpp cpp-commons/trunk/src/commons/files.h cpp-commons/trunk/src/commons/strings.h cpp-commons/trunk/src/commons/threads.h cpp-commons/trunk/src/commons/time.h cpp-commons/trunk/src/test/ Removed Paths: ------------- cpp-commons/trunk/src/cppcommons.cpp cpp-commons/trunk/src/files.h cpp-commons/trunk/src/test.cpp Modified: cpp-commons/trunk/src/Makefile.am =================================================================== --- cpp-commons/trunk/src/Makefile.am 2008-02-10 04:04:44 UTC (rev 348) +++ cpp-commons/trunk/src/Makefile.am 2008-02-10 18:28:49 UTC (rev 349) @@ -6,8 +6,8 @@ # the library search path. cppcommons_LDFLAGS = $(all_libraries) lib_LIBRARIES = libcppcommons.a -cppcommons_SOURCES = test.cpp -libcppcommons_a_SOURCES = cppcommons.cpp files.h -noinst_HEADERS = files.h +cppcommons_SOURCES = test/files.cpp +libcppcommons_a_SOURCES = commons/cppcommons.cpp commons/files.h +noinst_HEADERS = commons/files.h AM_CXXFLAGS = -I/opt/boost-1.34.0/include/ cppcommons_LDADD = -lboost_filesystem-gcc41 Added: cpp-commons/trunk/src/commons/check.h =================================================================== --- cpp-commons/trunk/src/commons/check.h (rev 0) +++ cpp-commons/trunk/src/commons/check.h 2008-02-10 18:28:49 UTC (rev 349) @@ -0,0 +1,47 @@ +#ifndef _COMMONS_CHECK_H +#define _COMMONS_CHECK_H + +#include <exception> +#include <sstream> +#include <string> + +namespace commons +{ + + using namespace std; + + class check_exception : exception + { + public: + check_exception(const string & name) : name(name) {} + virtual ~check_exception() throw() {} + private: + const string name; + }; + + inline void + _check(bool cond, const char *msg, const char *file, int line) + { + if (!cond) { + stringstream ss; + ss << file << ':' << line << ": "; + if (msg != NULL) ss << msg; + ss << endl; + throw check_exception(ss.str()); + } + } + +} + +#define check(cond) _check(cond, NULL, __FILE__, __LINE__) + + /** + * Similar to assert(), but is not conditionally compiled, so this is safe to + * use as a guard against expected failures (such as checking return codes). + */ +#define checkmsg(cond, msg) \ + bool b = cond; \ + if (!b) _check(b, (msg), __FILE__, __LINE__) + +#endif + Added: cpp-commons/trunk/src/commons/cppcommons.cpp =================================================================== --- cpp-commons/trunk/src/commons/cppcommons.cpp (rev 0) +++ cpp-commons/trunk/src/commons/cppcommons.cpp 2008-02-10 18:28:49 UTC (rev 349) @@ -0,0 +1,31 @@ +/*************************************************************************** + * Copyright (C) 2007 by Yang Zhang * + * gmail:yaaang * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU Library General Public License as * + * published by the Free Software Foundation; either version 2 of the * + * License, or (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU Library General Public * + * License along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + ***************************************************************************/ + + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "commons/check.h" +#include "commons/files.h" +#include "commons/strings.h" +#include "commons/time.h" +#include "commons/threads.h" + Added: cpp-commons/trunk/src/commons/files.h =================================================================== --- cpp-commons/trunk/src/commons/files.h (rev 0) +++ cpp-commons/trunk/src/commons/files.h 2008-02-10 18:28:49 UTC (rev 349) @@ -0,0 +1,90 @@ +#ifndef _COMMONS_FILES_H +#define _COMMONS_FILES_H + +#include <exception> +#include <fstream> +#include <iostream> +#include <string> +#include <vector> + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> + +#include "commons/check.h" + +namespace commons +{ + + using namespace std; + + class file_not_found_exception : exception { + public: + file_not_found_exception(const string & name) : name(name) {} + virtual ~file_not_found_exception() throw() {} + private: + const string name; + }; + + /** + * Read in a whole file as a string. + */ + void read_file_as_string ( const string & name, string & out ) { + ifstream in ( name.c_str() ); + if (in.fail()) throw file_not_found_exception( name ); + out = string ( istreambuf_iterator<char> ( in ), istreambuf_iterator<char>() ); + } + + /** + * Read in a whole file as a vector of chars. + */ + void read_file_as_vector ( const string & name, vector<char> & out ) { + ifstream in ( name.c_str() ); + if ( in.fail() ) throw file_not_found_exception( name ); + out = vector<char> ( istreambuf_iterator<char> ( in ), istreambuf_iterator<char>() ); + } + + /** + * Load an entire file directly into buf and also give us the length of the + * buffer (size of the file). + * TODO this probably isn't very safe, since we're demoting an off_t to a + * size_t. Is there a healthier approach? + * TODO move to C99 commons + */ + char * + load_file(const char *path, size_t & len, unsigned int ncpus) { + struct stat sb; + int fd; + + fd = open(path, 0); + check(fd >= 0); + + check(fstat(fd, &sb) == 0); + check(sb.st_size <= 0xffffffff); + + // TODO Why don't we need (static) cast here? Isn't this a lossy cast? + len = sb.st_size; + + char *buf = new char[len + 1]; + check(buf); + + // TODO Use threads to pull data to the correct initial locations? + size_t chunk_len = len / ncpus; + for (unsigned int i = 0; i < ncpus; i++) { + int off = i *chunk_len; + ssize_t status = pread(fd, buf + off, chunk_len, off); + // We read the whole chunk or hit the end. + size_t nread = static_cast<ssize_t>(status); + check(status != -1 && (nread == chunk_len || off + nread == len)); + } + + check(close(fd) == 0); + + buf[len] = '\0'; // don't let strcmp() run off the end + return buf; + } + +} + +#endif Added: cpp-commons/trunk/src/commons/strings.h =================================================================== --- cpp-commons/trunk/src/commons/strings.h (rev 0) +++ cpp-commons/trunk/src/commons/strings.h 2008-02-10 18:28:49 UTC (rev 349) @@ -0,0 +1,82 @@ +// TODO: move whatever you can to C99 Commons + +#ifndef _COMMONS_STRINGS_H +#define _COMMONS_STRINGS_H + +#include <strings.h> + +#include "commons/check.h" + +namespace commons +{ + + using namespace std; + + /** + * Search in p for the nth instance of c and return the character past it. + */ + inline const char * + strchrrep(const char *p, char c, int n) + { + for (int i = 0; i < n; i++) { + p = strchr(p, c); + check(p); + p++; + } + return p; + } + + /** + * Search in p for the nth instance of c and return the character past it. + */ + inline char * + strchrrep(char *p, char c, int n) + { + return const_cast<char *>(strchrrep(const_cast<const char *>(p), c, n)); + } + + /** + * A functor that checks for string equality. Mainly useful as a template + * parameter to the hash data structures in STL extensions. + */ + struct eqstr + { + bool operator()(const char* s1, const char* s2) const + { + return strcmp(s1, s2) == 0; + } + }; + + /** + * Look for a substring, but without null-termination conventions. + */ + inline char * + unsafe_strstr(char *p, const char *q, const char *lim) + { + if (lim == 0) { + while (true) { + for (; !(*p == '\0' && *(p+1) == '\0'); p++); + return p; + } + } else { + check(p < lim); + while (true) { + for (; !(*p == '\0' && *(p+1) == '\0') && p < lim; p++); + if (p == lim) return NULL; + return p; + } + } + } + + /** + * Look for a substring, but without null-termination conventions. + */ + inline const char* + unsafe_strstr(const char *p, const char *q, const char *lim) + { + return unsafe_strstr((char*) p, q, lim); + } + +} + +#endif Added: cpp-commons/trunk/src/commons/threads.h =================================================================== --- cpp-commons/trunk/src/commons/threads.h (rev 0) +++ cpp-commons/trunk/src/commons/threads.h 2008-02-10 18:28:49 UTC (rev 349) @@ -0,0 +1,138 @@ +// TODO: use boost::bind? + +#ifndef _COMMONS_THREADS_H +#define _COMMONS_THREADS_H + +#include <pthread.h> + +namespace commons +{ + + // non-rpc-specific utility to start a thread that runs + // an object method. returns a pthread_t on success, and + // zero on error. + template <class C> pthread_t + method_thread(C *o, void (C::*m)()) + { + class XXX { + public: + C *o; + void (C::*m)(); + static void *yyy(void *vvv) { + XXX *x = (XXX*)vvv; + C *o = x->o; + void (C::*m)() = x->m; + delete x; + (o->*m)(); + return 0; + } + }; + XXX *x = new XXX; + x->o = o; + x->m = m; + pthread_t th; + if(pthread_create(&th, NULL, &XXX::yyy, (void *) x) == 0){ + return th; + } + return 0; + } + + template <class C, class A> pthread_t + method_thread(C *o, void (C::*m)(A), A a) + { + class XXX { + public: + C *o; + void (C::*m)(A a); + A a; + static void *yyy(void *vvv) { + XXX *x = (XXX*)vvv; + C *o = x->o; + void (C::*m)(A ) = x->m; + A a = x->a; + delete x; + (o->*m)(a); + return 0; + } + }; + XXX *x = new XXX; + x->o = o; + x->m = m; + x->a = a; + pthread_t th; + if(pthread_create(&th, NULL, &XXX::yyy, (void *) x) == 0){ + return th; + } + return 0; + } + + template <class C, class A1, class A2> pthread_t + method_thread(C *o, void (C::*m)(A1 , A2 ), A1 a1, A2 a2) + { + class XXX { + public: + C *o; + void (C::*m)(A1 a1, A2 a2); + A1 a1; + A2 a2; + static void *yyy(void *vvv) { + XXX *x = (XXX*)vvv; + C *o = x->o; + void (C::*m)(A1 , A2 ) = x->m; + A1 a1 = x->a1; + A2 a2 = x->a2; + delete x; + (o->*m)(a1, a2); + return 0; + } + }; + XXX *x = new XXX; + x->o = o; + x->m = m; + x->a1 = a1; + x->a2 = a2; + pthread_t th; + if(pthread_create(&th, NULL, &XXX::yyy, (void *) x) == 0){ + return th; + } + return 0; + } + + template <class C, class A1, class A2, class A3> pthread_t + method_thread(C *o, void (C::*m)(A1 , A2, A3), A1 a1, A2 a2, A3 a3) + { + class XXX { + public: + C *o; + void (C::*m)(A1 a1, A2 a2, A3 a3); + A1 a1; + A2 a2; + A3 a3; + static void *yyy(void *vvv) { + XXX *x = (XXX*)vvv; + C *o = x->o; + void (C::*m)(A1, A2, A3) = x->m; + A1 a1 = x->a1; + A2 a2 = x->a2; + A3 a3 = x->a3; + delete x; + (o->*m)(a1, a2, a3); + return 0; + } + }; + XXX *x = new XXX; + x->o = o; + x->m = m; + x->a1 = a1; + x->a2 = a2; + x->a3 = a3; + pthread_t th; + if(pthread_create(&th, NULL, &XXX::yyy, (void *) x) == 0){ + return th; + } + return 0; + } + +} + +#endif Added: cpp-commons/trunk/src/commons/time.h =================================================================== --- cpp-commons/trunk/src/commons/time.h (rev 0) +++ cpp-commons/trunk/src/commons/time.h 2008-02-10 18:28:49 UTC (rev 349) @@ -0,0 +1,54 @@ +#ifndef _COMMONS_TIME_H +#define _COMMONS_TIME_H + +#include <string> +#include <iostream> + +#include <sys/time.h> +#include <time.h> + +namespace commons +{ + + using namespace std; + + /** + * Get the current time in milliseconds. + * TODO: move to C99 Commons. + */ + inline long long + current_time_millis() + { + long long t; + struct timeval tv; + + gettimeofday(&tv, 0); + + t = tv.tv_sec; + t = (t *1000) + (tv.tv_usec/1000); + + return t; + } + + /** + * Convenience class for performing wall-clock benchmarking. + */ + class timer + { + public: + timer(const string label) : + label(label), start(current_time_millis()), last(start) {} + void print() + { + long long now = current_time_millis(); + cout << label << now - last << endl; + last = now; + } + private: + const string label; + long long start, last; + }; + +} + +#endif Deleted: cpp-commons/trunk/src/cppcommons.cpp =================================================================== --- cpp-commons/trunk/src/cppcommons.cpp 2008-02-10 04:04:44 UTC (rev 348) +++ cpp-commons/trunk/src/cppcommons.cpp 2008-02-10 18:28:49 UTC (rev 349) @@ -1,27 +0,0 @@ -/*************************************************************************** - * Copyright (C) 2007 by Yang Zhang * - * gmail:yaaang * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU Library General Public License as * - * published by the Free Software Foundation; either version 2 of the * - * License, or (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU Library General Public * - * License along with this program; if not, write to the * - * Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - ***************************************************************************/ - - -#ifdef HAVE_CONFIG_H -#include <config.h> -#endif - -#include "files.h" - Deleted: cpp-commons/trunk/src/files.h =================================================================== --- cpp-commons/trunk/src/files.h 2008-02-10 04:04:44 UTC (rev 348) +++ cpp-commons/trunk/src/files.h 2008-02-10 18:28:49 UTC (rev 349) @@ -1,46 +0,0 @@ -/*************************************************************************** - * Copyright (C) 2007 by Yang Zhang * - * gmail:yaaang * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU Library General Public License as * - * published by the Free Software Foundation; either version 2 of the * - * License, or (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU Library General Public * - * License along with this program; if not, write to the * - * Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - ***************************************************************************/ -#include <vector> -#include <iostream> -#include <fstream> -#include <string> -#include <exception> - -using namespace std; - -class file_not_found_exception : exception { - public: - file_not_found_exception(const string & name) : name(name) {} - virtual ~file_not_found_exception() throw() {} - private: - const string name; -}; - -void read_file_as_string ( const string & name, string & out ) { - ifstream in ( name.c_str() ); - if (in.fail()) throw file_not_found_exception( name ); - out = string ( istreambuf_iterator<char> ( in ), istreambuf_iterator<char>() ); -} - -void read_file_as_vector ( const string & name, vector<char> & out ) { - ifstream in ( name.c_str() ); - if ( in.fail() ) throw file_not_found_exception( name ); - out = vector<char> ( istreambuf_iterator<char> ( in ), istreambuf_iterator<char>() ); -} Deleted: cpp-commons/trunk/src/test.cpp =================================================================== --- cpp-commons/trunk/src/test.cpp 2008-02-10 04:04:44 UTC (rev 348) +++ cpp-commons/trunk/src/test.cpp 2008-02-10 18:28:49 UTC (rev 349) @@ -1,37 +0,0 @@ -/*************************************************************************** - * Copyright (C) 2007 by Yang Zhang * - * gmail:yaaang * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU Library General Public License as * - * published by the Free Software Foundation; either version 2 of the * - * License, or (at your option) any later version. * - * * - * This program is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU General Public License for more details. * - * * - * You should have received a copy of the GNU Library General Public * - * License along with this program; if not, write to the * - * Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - ***************************************************************************/ - -#include "files.h" - -#include <cstdlib> -#include <boost/filesystem.hpp> - -using namespace boost::filesystem; - -int main ( int argc, char *argv[] ) { - cout << "Hello, world!" << current_path() << endl; - - for ( int i = 0; i < 10; i++ ) { - vector<char> v; - read_file_as_vector("codex.umz", v); - } - - return EXIT_SUCCESS; -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-10 04:04:40
|
Revision: 348 http://assorted.svn.sourceforge.net/assorted/?rev=348&view=rev Author: yangzhang Date: 2008-02-09 20:04:44 -0800 (Sat, 09 Feb 2008) Log Message: ----------- added bunch of stuff to Collections Modified Paths: -------------- scala-commons/trunk/src/commons/Collections.scala Modified: scala-commons/trunk/src/commons/Collections.scala =================================================================== --- scala-commons/trunk/src/commons/Collections.scala 2008-02-10 03:54:41 UTC (rev 347) +++ scala-commons/trunk/src/commons/Collections.scala 2008-02-10 04:04:44 UTC (rev 348) @@ -330,11 +330,20 @@ def str(xs: Iterable[Char]) = xs mkString "" /** - * Return an infinite stream, where each element evaluates gen. + * Return an infinite stream, where each element is an evaluation of gen. */ def repeat[a](gen: => a): Stream[a] = Stream.cons(gen, repeat(gen)) /** + * Return a stream of n elements, where each element is an evaluation of gen. + * Note that gen may be evaluated more than n times. See: + * + * "How to add laziness to a strict language, without even being odd" + * http://homepages.inf.ed.ac.uk/wadler/papers/lazyinstrict/lazyinstrict.ps + */ + def replicate[a](n: Int, gen: => a): Stream[a] = repeat(gen) take n + + /** * Return a stream of length at least n whose first elements are from the * given iterator, but if the iterator has fewer than n elements, then the * remaining elements are repeat(gen). @@ -388,9 +397,9 @@ /** * Indexes the result of groupBy. * <p> - * 0 1 2 3 4 5 6 7 8 9 - * [a,b,c,c,c,d,d,e,f,f] -> [[0],[1],[2,3,4],[5,6],[7],[8,9]] - * [] -> [] + // 0 1 2 3 4 5 6 7 8 9 + // [a,b,c,c,c,d,d,e,f,f] -> [[0],[1],[2,3,4],[5,6],[7],[8,9]] + // [] -> [] */ def indexGroups[a,b](xs: Seq[a])(f: a => b) = { val i = Iterator from 0 @@ -437,6 +446,9 @@ object Tree { abstract class Tree[a] { + /** + * Show the tree as a string. + */ // Leaf("a") -> // a // @@ -457,9 +469,34 @@ } r(this) mkString "\n" } + /** + * Access a node via the given index path. + */ + def get(path: Seq[Int]): a = (this, path) match { + case (Leaf(x), Seq() ) => x + case (Branch(ts), Seq(x, xs@_*)) => ts(x) get xs + case _ => throw new Exception("invalid path") + } + /** + * Flatten the leaves into a single stream. + */ + def flatten: Stream[a] = this match { + case Branch(ts) => Stream concat (ts map (_.flatten)) + case Leaf(x) => Stream cons (x, Stream empty) + } } case class Branch[a](ts: Seq[Tree[a]]) extends Tree[a] case class Leaf[a](x: a) extends Tree[a] + + /** + * Build a tree whose i-th level branch has a fanout of xs(i). + */ + def treeFromFanouts[a](gen: => a, fanouts: Seq[Int]): Tree[a] = + fanouts match { + case Seq() => Leaf(gen) + case Seq(fanout, rest@_*) => + Branch(replicate(fanout, treeFromFanouts(gen, rest)).toArray) + } } case class TreeNode[a](value: a, children: Seq[TreeNode[a]]) { @@ -611,4 +648,49 @@ // [1,3,5,6,7,8] odd -> ([1,3,5],[6,7,8]) // [] _ -> ([],[]) // + + def camelToLower(s: String, sep: String) = { + val xs = + for (c <- s) + yield if (c.isUpperCase) sep + c.toLowerCase else c + xs mkString "" + } + def camelToUnder(s: String) = camelToLower(s, "_") + def camelToHyphen(s: String) = camelToLower(s, "-") + + // TODO: this isn't really rot encoding + def rot(n: Int, s: String) = s map (_ + n toChar) mkString + + def untilNull[a](f: => a) = new Iterator[a] { + var upcoming = f + override def hasNext = upcoming != null + override def next = { + val emit = upcoming + upcoming = f + emit + } + } + + /** + * Returns the positive (unsigned int) modulo. + */ + def mod(n: Int, m: Int) = { + val r = n % m + if (r < 0) r + m else r + } + + /** + * Returns pairs of elements. + * <p> + * <code> + * pairs([a,b,c,d]) == [(a,b),(c,d)] + * pairs([a,b,c,d,e]) == [(a,b),(c,d)] + * </code> + */ + def pairs[a](xs: Seq[a]): Stream[(a,a)] = { + xs match { + case Seq() => Stream empty + case Seq(a, b, rest @ _*) => Stream cons ((a,b), pairs(rest)) + } + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-10 03:54:36
|
Revision: 347 http://assorted.svn.sourceforge.net/assorted/?rev=347&view=rev Author: yangzhang Date: 2008-02-09 19:54:41 -0800 (Sat, 09 Feb 2008) Log Message: ----------- added hash-dist Added Paths: ----------- hash-dist/ hash-dist/trunk/ hash-dist/trunk/README hash-dist/trunk/src/ hash-dist/trunk/src/HashDist.scala hash-dist/trunk/src/Makefile Added: hash-dist/trunk/README =================================================================== --- hash-dist/trunk/README (rev 0) +++ hash-dist/trunk/README 2008-02-10 03:54:41 UTC (rev 347) @@ -0,0 +1,26 @@ +This is a simple utility for observing the distribution of a hash-function. For +instance, the arguments + + full djb2 2 stl 1000000 + +should be structurally interpreted as + + full + ("djb2", 2) + ("stl", 2) + +This constructs 2 top-level buckets, each leading to 1000000 second-level +buckets, where the first level is hashed by the DJB2 function, and the second +level is hashed by the STL function. + +Valid options are: + +- `cdf`: write a CDF of bucket sizes into the file `cdf`. +- `dump`: write the complete bucket sizes to stdout. +- `full`: produce both the CDF and the dump. + +Valid hash functions are: + +- djb2 +- java +- stl Added: hash-dist/trunk/src/HashDist.scala =================================================================== --- hash-dist/trunk/src/HashDist.scala (rev 0) +++ hash-dist/trunk/src/HashDist.scala 2008-02-10 03:54:41 UTC (rev 347) @@ -0,0 +1,93 @@ +import commons._ +import Collections._ +import Control._ +import Io._ +import Tree._ + +import scala.util._ + +object HashDist { + + /** + * From libstdc++ 4.1 __stl_hash_string. + */ + def hashStl(xs: Seq[Int]) = { + var h = 0 + for (x <- xs) h = 5 * h + x + h + } + + /** + * From Sun JDK6 String.hashCode. + */ + def hashJava(xs: Seq[Int]) = { + var h = 0 + for (x <- xs) h = 31 * h + x + h + } + + /** + * From http://www.cse.yorku.ca/~oz/hash.html. Not sure if this is correct, + * since Int is signed. + */ + def hashDjb2(xs: Seq[Int]) = { + var h = 5381 + for (x <- xs) h = ((h << 5) + h) + x + h + } + + /** + * Hash function. + */ + type Hasher = Seq[Int] => Int + + /** + * Hash function and number of buckets. + */ + type HashLevel = (Hasher, Int) + + /** + * Build a hierarchical hash table to observe the hash functions' + * distributions, then dump the distribution in gnuplot datafile format. + */ + def main(args: Array[String]) { + // Parse the arguments. + val hs: Array[HashLevel] = ( + for ((name, size) <- pairs(args)) yield { + val h = name match { + case "djb2" => hashDjb2 _ + case "java" => hashJava _ + case "stl" => hashStl _ + } + val n = size.toInt + (h,n) + } + ) toArray + val fanouts = hs map (_._2) + + // Construct the hierarchical hash table. + val tree = treeFromFanouts(new Array[Int](fanouts.last), fanouts.toStream.init) + + // For each line, hash its way down the tree. + for (line <- untilNull(Console.readLine)) { + val xs = line.toCharArray map (_ toInt) + val path = (for ((h,n) <- hs) yield mod(h(xs), n)).toArray + val counts: Array[Int] = tree get path.toStream.init + counts(path.last) += 1 + } + + // Display the final counts. + for ((xs,i) <- tree.flatten.zipWithIndex; (x,j) <- xs.zipWithIndex) + println(i + " " + j + " " + x) + + // Output the CDF in gnuplot datafile format. + using (TextWriter("cdf")) { w => + val counts: Array[Int] = tree.flatten flatMap (x=>x) toArray; + Sorting quickSort counts + for ((count,i) <- counts zipWithIndex) { + w.println(i + " " + count) + } + } + } + +} Added: hash-dist/trunk/src/Makefile =================================================================== --- hash-dist/trunk/src/Makefile (rev 0) +++ hash-dist/trunk/src/Makefile 2008-02-10 03:54:41 UTC (rev 347) @@ -0,0 +1,10 @@ +all: out/HashDist.class + +out/HashDist.class: HashDist.scala $(COMMONS_SRCS) + mkdir -p out + fsc -d out $^ + +clean: + rm -rf out + +.PHONY: clean This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-10 03:11:32
|
Revision: 346 http://assorted.svn.sourceforge.net/assorted/?rev=346&view=rev Author: yangzhang Date: 2008-02-09 19:11:37 -0800 (Sat, 09 Feb 2008) Log Message: ----------- added small bug Added Paths: ----------- sandbox/trunk/src/scala/MissingParamTypeBug.scala Added: sandbox/trunk/src/scala/MissingParamTypeBug.scala =================================================================== --- sandbox/trunk/src/scala/MissingParamTypeBug.scala (rev 0) +++ sandbox/trunk/src/scala/MissingParamTypeBug.scala 2008-02-10 03:11:37 UTC (rev 346) @@ -0,0 +1,13 @@ +// Results in the following compile error: +// +// MissingParamTypeBug.scala:4: error: missing parameter type +// for ((name,size) <- s toArray) println(name) +// ^ +// one error found +// +object MissingParramTypeBug { + def main(args: Array[String]) { + val s = List("a","b").toStream + for ((name,size) <- s toArray) println(name) + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-09 19:39:45
|
Revision: 345 http://assorted.svn.sourceforge.net/assorted/?rev=345&view=rev Author: yangzhang Date: 2008-02-09 11:39:48 -0800 (Sat, 09 Feb 2008) Log Message: ----------- removed miss msgs Modified Paths: -------------- hash-join/trunk/src/hashjoin.cc Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-09 19:37:26 UTC (rev 344) +++ hash-join/trunk/src/hashjoin.cc 2008-02-09 19:39:48 UTC (rev 345) @@ -571,7 +571,6 @@ hits++; join(title, name); } else { - cout << " MISS " << title << endl; misses++; } // End of a tuple? (Don't actually need this check, since the This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-09 19:37:20
|
Revision: 344 http://assorted.svn.sourceforge.net/assorted/?rev=344&view=rev Author: yangzhang Date: 2008-02-09 11:37:26 -0800 (Sat, 09 Feb 2008) Log Message: ----------- fixed some protection/docs Modified Paths: -------------- hash-join/trunk/src/hashjoin.cc Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-09 19:37:17 UTC (rev 343) +++ hash-join/trunk/src/hashjoin.cc 2008-02-09 19:37:26 UTC (rev 344) @@ -55,8 +55,6 @@ /** * Search in p for the nth instance of c and return the character past it. - * TODO figure out if there's a way to merge this and the above rather than - * maintaining two versions. (Related to Linus Torvalds' post on const?) */ inline char * strchrrep(char *p, char c, int n) @@ -223,12 +221,10 @@ { public: db(const char *path) : buf(load_file(path, buflen, ncpus)) {} - const bucket **partition(); /** - * This routine runs on each processor to hash-partition the data into local - * buckets. + * Run hash-partitioning phase on all processors. */ - virtual void partition1(unsigned int pid, bucket* bucket) = 0; + const bucket **partition(); virtual ~db() { delete [] buf; } /** * Push a tuple into one of the buckets. Which bucket is determined by the @@ -243,6 +239,11 @@ unsigned int push_bucket(char **heads, bucket *bs, const char *s, const char *p, size_t nbytes); protected: + /** + * This routine runs on each processor to hash-partition the data into local + * buckets. + */ + virtual void partition1(unsigned int pid, bucket* bucket) = 0; char *buf; size_t buflen; }; @@ -259,6 +260,7 @@ * Build the hash map in parallel. */ const hmap *build(const bucket **movbucs); +protected: /** * Each node runs this routine to construct its local hash map. */ @@ -278,6 +280,7 @@ * Probe the hash maps with tuples from the actor buckets. */ void probe(const hmap *hs, const bucket **actbucs, bool show_progress); +protected: /** * Each node runs this routine to probe into its local hash map using tuples * from actor buckets destined for that node. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-09 19:37:18
|
Revision: 343 http://assorted.svn.sourceforge.net/assorted/?rev=343&view=rev Author: yangzhang Date: 2008-02-09 11:37:17 -0800 (Sat, 09 Feb 2008) Log Message: ----------- fixed up for binutils list post Modified Paths: -------------- hash-join/trunk/src/Makefile Modified: hash-join/trunk/src/Makefile =================================================================== --- hash-join/trunk/src/Makefile 2008-02-09 16:06:45 UTC (rev 342) +++ hash-join/trunk/src/Makefile 2008-02-09 19:37:17 UTC (rev 343) @@ -1,31 +1,39 @@ -CFLAGS := -Wall -lprofiler -lpthread -CXX = g++ $(CFLAGS) -o $@ $^ +TARGET := hashjoin +SRCS := hashjoin.cc +### begin common makefrag + +CFLAGS := -Wall -lpthread # -lprofiler + +CXX = g++ $(CFLAGS) -o $@ $^ + all: opt dbg pg -dbg: hashjoin-dbg -opt: hashjoin-opt -pg: hashjoin-pg +dbg: $(TARGET)-dbg +opt: $(TARGET)-opt +pg: $(TARGET)-pg -bench: hashjoin-opt - for i in 1 `seq 2 2 16` `seq 24 8 64` ; do \ - ./hashjoin-opt $$i $(MOVIEDATA)/movies.dat $(MOVIEDATA)/actresses.dat ; \ - done > log 2>&1 - -hashjoin-pg: hashjoin.cc +$(TARGET)-pg: $(SRCS) $(CXX) -g -pg -hashjoin-dbg: hashjoin.cc +$(TARGET)-dbg: $(SRCS) $(CXX) -g3 -fno-omit-frame-pointer -hashjoin-opt: hashjoin.cc +$(TARGET)-opt: $(SRCS) $(CXX) -g -O3 -fno-omit-frame-pointer doc: doc/html/index.html -doc/html/index.html: hashjoin.cc +doc/html/index.html: $(SRCS) Doxyfile doxygen clean: - rm -f hashjoin-opt hashjoin-dbg hashjoin-pg doc + rm -rf $(TARGET)-opt $(TARGET)-dbg $(TARGET)-pg doc .PHONY: clean dbg opt pg bench doc + +### end common makefrag + +bench: $(TARGET)-opt + for i in 1 `seq 2 2 16` `seq 24 8 64` ; do \ + ./$(TARGET)-opt $$i $(MOVIEDATA)/movies.dat $(MOVIEDATA)/actresses.dat ; \ + done > log 2>&1 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-09 16:06:40
|
Revision: 342 http://assorted.svn.sourceforge.net/assorted/?rev=342&view=rev Author: yangzhang Date: 2008-02-09 08:06:45 -0800 (Sat, 09 Feb 2008) Log Message: ----------- added documentation Modified Paths: -------------- hash-join/trunk/src/Makefile hash-join/trunk/src/hashjoin.cc Added Paths: ----------- hash-join/trunk/src/Doxyfile Added: hash-join/trunk/src/Doxyfile =================================================================== --- hash-join/trunk/src/Doxyfile (rev 0) +++ hash-join/trunk/src/Doxyfile 2008-02-09 16:06:45 UTC (rev 342) @@ -0,0 +1,1294 @@ +# Doxyfile 1.5.3 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file that +# follow. The default is UTF-8 which is also the encoding used for all text before +# the first occurrence of this tag. Doxygen uses libiconv (or the iconv built into +# libc) for the transcoding. See http://www.gnu.org/software/libiconv for the list of +# possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = Hash-Join + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = doc + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Finnish, French, German, Greek, Hungarian, +# Italian, Japanese, Japanese-en (Japanese with English messages), Korean, +# Korean-en, Lithuanian, Norwegian, Polish, Portuguese, Romanian, Russian, +# Serbian, Slovak, Slovene, Spanish, Swedish, and Ukrainian. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = YES + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the DETAILS_AT_TOP tag is set to YES then Doxygen +# will output the detailed description near the top, like JavaDoc. +# If set to NO, the detailed description appears after the member +# documentation. + +DETAILS_AT_TOP = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for Java. +# For instance, namespaces will be presented as packages, qualified scopes +# will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want to +# include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be extracted +# and appear in the documentation as a namespace called 'anonymous_namespace{file}', +# where file will be replaced with the base name of the file that contains the anonymous +# namespace. By default anonymous namespace are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from the +# version control system). Doxygen will invoke the program by executing (via +# popen()) the command <command> <input-file>, where <command> is the value of +# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = YES + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = NO + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = . + +# This tag can be used to specify the character encoding of the source files that +# doxygen parses. Internally doxygen uses the UTF-8 encoding, which is also the default +# input encoding. Doxygen uses libiconv (or the iconv built into libc) for the transcoding. +# See http://www.gnu.org/software/libiconv for the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the output. +# The symbol name can be a fully qualified name, a word, or if the wildcard * is used, +# a substring. Examples: ANamespace, AClass, AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command <filter> <input-file>, where <filter> +# is the value of the INPUT_FILTER tag, and <input-file> is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. If you have enabled CALL_GRAPH or CALLER_GRAPH +# then you must also enable this option. If you don't then doxygen will produce +# a warning and turn it on anyway + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = YES + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES (the default) +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES (the default) +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = YES + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. Otherwise they will link to the documentstion. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = NO + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be +# generated containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, +# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are +# probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = NO + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = NO + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. This is useful +# if you want to understand what is going on. On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see http://www.mcternan.me.uk/mscgen/) to +# produce the chart and insert it in the documentation. The MSCGEN_PATH tag allows you to +# specify the directory where the mscgen tool resides. If left empty the tool is assumed to +# be found in the default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = YES + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH, SOURCE_BROWSER and HAVE_DOT tags are set to YES then doxygen will +# generate a call dependency graph for every global function or class method. +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable call graphs for selected +# functions only using the \callgraph command. + +CALL_GRAPH = YES + +# If the CALLER_GRAPH, SOURCE_BROWSER and HAVE_DOT tags are set to YES then doxygen will +# generate a caller dependency graph for every global function or class method. +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable caller graphs for selected +# functions only using the \callergraph command. + +CALLER_GRAPH = YES + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MAX_DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the number +# of direct children of the root node in a graph is already larger than +# MAX_DOT_GRAPH_NOTES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, which results in a white background. +# Warning: Depending on the platform used, enabling this option may lead to +# badly anti-aliased labels on the edges of a graph (i.e. they become hard to +# read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to the search engine +#--------------------------------------------------------------------------- + +# The SEARCHENGINE tag specifies whether or not a search engine should be +# used. If set to NO the values of all tags below this one will be ignored. + +SEARCHENGINE = NO Modified: hash-join/trunk/src/Makefile =================================================================== --- hash-join/trunk/src/Makefile 2008-02-09 06:02:28 UTC (rev 341) +++ hash-join/trunk/src/Makefile 2008-02-09 16:06:45 UTC (rev 342) @@ -20,7 +20,12 @@ hashjoin-opt: hashjoin.cc $(CXX) -g -O3 -fno-omit-frame-pointer +doc: doc/html/index.html + +doc/html/index.html: hashjoin.cc + doxygen + clean: - rm -f hashjoin-opt hashjoin-dbg hashjoin-pg + rm -f hashjoin-opt hashjoin-dbg hashjoin-pg doc -.PHONY: clean dbg opt pg bench +.PHONY: clean dbg opt pg bench doc Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-09 06:02:28 UTC (rev 341) +++ hash-join/trunk/src/hashjoin.cc 2008-02-09 16:06:45 UTC (rev 342) @@ -230,6 +230,16 @@ */ virtual void partition1(unsigned int pid, bucket* bucket) = 0; virtual ~db() { delete [] buf; } + /** + * Push a tuple into one of the buckets. Which bucket is determined by the + * hash partitioning scheme. + * + * \param heads Array of "cursors" into each bucket. + * \param bs Array of buckets. + * \param s The string to hash. + * \param p The start of the tuple. + * \param nbytes The length of the tuple. + */ unsigned int push_bucket(char **heads, bucket *bs, const char *s, const char *p, size_t nbytes); protected: @@ -238,7 +248,7 @@ }; /** - * This is something which we must free. + * Movie database. */ class movdb : public db { @@ -256,6 +266,9 @@ void partition1(unsigned int pid, bucket* bucket); }; +/** + * Actress database. + */ class actdb : public db { public: @@ -362,11 +375,6 @@ return next == NULL ? p + strlen(p) : next + 2; } -/** - * \param s The string to hash. - * \param p The start of the tuple. - * \param nbytes The length of the tuple. - */ unsigned int db::push_bucket(char **heads, bucket *bs, const char *s, const char *p, size_t nbytes) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-09 06:02:25
|
Revision: 341 http://assorted.svn.sourceforge.net/assorted/?rev=341&view=rev Author: yangzhang Date: 2008-02-08 22:02:28 -0800 (Fri, 08 Feb 2008) Log Message: ----------- added benchmarker to makefile Modified Paths: -------------- hash-join/trunk/src/Makefile Modified: hash-join/trunk/src/Makefile =================================================================== --- hash-join/trunk/src/Makefile 2008-02-09 05:11:38 UTC (rev 340) +++ hash-join/trunk/src/Makefile 2008-02-09 06:02:28 UTC (rev 341) @@ -6,16 +6,21 @@ opt: hashjoin-opt pg: hashjoin-pg +bench: hashjoin-opt + for i in 1 `seq 2 2 16` `seq 24 8 64` ; do \ + ./hashjoin-opt $$i $(MOVIEDATA)/movies.dat $(MOVIEDATA)/actresses.dat ; \ + done > log 2>&1 + hashjoin-pg: hashjoin.cc $(CXX) -g -pg hashjoin-dbg: hashjoin.cc - $(CXX) -g3 + $(CXX) -g3 -fno-omit-frame-pointer hashjoin-opt: hashjoin.cc - $(CXX) -O3 + $(CXX) -g -O3 -fno-omit-frame-pointer clean: rm -f hashjoin-opt hashjoin-dbg hashjoin-pg -.PHONY: clean dbg opt +.PHONY: clean dbg opt pg bench This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-09 05:11:51
|
Revision: 340 http://assorted.svn.sourceforge.net/assorted/?rev=340&view=rev Author: yangzhang Date: 2008-02-08 21:11:38 -0800 (Fri, 08 Feb 2008) Log Message: ----------- fixed some gcc-4.2 issues Modified Paths: -------------- hash-join/trunk/src/hashjoin.cc Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-07 18:43:54 UTC (rev 339) +++ hash-join/trunk/src/hashjoin.cc 2008-02-09 05:11:38 UTC (rev 340) @@ -116,7 +116,7 @@ * Look for a substring, but without null-termination conventions. */ inline char * -unsafe_strstr(char *p, char *q, char *lim) +unsafe_strstr(char *p, const char *q, const char *lim) { if (lim == 0) { while (true) { @@ -134,6 +134,15 @@ } /** + * Look for a substring, but without null-termination conventions. + */ +inline const char* +unsafe_strstr(const char *p, const char *q, const char *lim) +{ + return unsafe_strstr((char*) p, q, lim); +} + +/** * Load an entire file into buf and also give us the length of the buffer. * TODO this probably isn't very safe, since we're demoting an off_t to a * size_t. Is there a healthier approach? This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-07 18:43:49
|
Revision: 339 http://assorted.svn.sourceforge.net/assorted/?rev=339&view=rev Author: yangzhang Date: 2008-02-07 10:43:54 -0800 (Thu, 07 Feb 2008) Log Message: ----------- oops Modified Paths: -------------- hash-join/trunk/src/Makefile Modified: hash-join/trunk/src/Makefile =================================================================== --- hash-join/trunk/src/Makefile 2008-02-07 18:42:39 UTC (rev 338) +++ hash-join/trunk/src/Makefile 2008-02-07 18:43:54 UTC (rev 339) @@ -7,7 +7,7 @@ pg: hashjoin-pg hashjoin-pg: hashjoin.cc - $(CXX) -pg + $(CXX) -g -pg hashjoin-dbg: hashjoin.cc $(CXX) -g3 @@ -16,6 +16,6 @@ $(CXX) -O3 clean: - rm -f hashjoin-opt hashjoin-dbg + rm -f hashjoin-opt hashjoin-dbg hashjoin-pg .PHONY: clean dbg opt This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-07 18:42:34
|
Revision: 338 http://assorted.svn.sourceforge.net/assorted/?rev=338&view=rev Author: yangzhang Date: 2008-02-07 10:42:39 -0800 (Thu, 07 Feb 2008) Log Message: ----------- updated Makefile to produce multiple version of the executable Modified Paths: -------------- hash-join/trunk/src/Makefile Modified: hash-join/trunk/src/Makefile =================================================================== --- hash-join/trunk/src/Makefile 2008-02-07 17:06:08 UTC (rev 337) +++ hash-join/trunk/src/Makefile 2008-02-07 18:42:39 UTC (rev 338) @@ -1,10 +1,21 @@ -all: hashjoin +CFLAGS := -Wall -lprofiler -lpthread +CXX = g++ $(CFLAGS) -o $@ $^ -hashjoin: hashjoin.cc - # g++ -g3 -Wall -o hashjoin hashjoin.cc -lprofiler -lpthread - g++ -O3 -Wall -o hashjoin hashjoin.cc -lprofiler -lpthread +all: opt dbg pg +dbg: hashjoin-dbg +opt: hashjoin-opt +pg: hashjoin-pg +hashjoin-pg: hashjoin.cc + $(CXX) -pg + +hashjoin-dbg: hashjoin.cc + $(CXX) -g3 + +hashjoin-opt: hashjoin.cc + $(CXX) -O3 + clean: - rm -f hashjoin + rm -f hashjoin-opt hashjoin-dbg -.PHONY: clean +.PHONY: clean dbg opt This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-07 17:06:08
|
Revision: 337 http://assorted.svn.sourceforge.net/assorted/?rev=337&view=rev Author: yangzhang Date: 2008-02-07 09:06:08 -0800 (Thu, 07 Feb 2008) Log Message: ----------- added software recommendations Added Paths: ----------- personal-site/trunk/src/recommendations.txt Added: personal-site/trunk/src/recommendations.txt =================================================================== --- personal-site/trunk/src/recommendations.txt (rev 0) +++ personal-site/trunk/src/recommendations.txt 2008-02-07 17:06:08 UTC (rev 337) @@ -0,0 +1,52 @@ +Some random underdogs. + +- [toast]: Automated software installation with safe compartmentalization. Works + without hassle 90% of the time. Supports a variety of package types, e.g.: + autotools, plain prefix-dir, plain make, distutils, cabal, etc. + +[toast]: http://toastball.net/ + +- [pandoc]: [Markdown] done right. A sane markup (structured text) language + supporting a variety of input and output formats, including HTML, man, and + TeX. [AsciiDoc] is a close runner-up. [ReST] starts to become unreadable. + If only there were some work on making these markup language extensible and + flexible enough to be used in everything from [wikis] to docstrings. + +[AsciiDoc]: http://www.methods.co.nz/asciidoc/ +[Markdown]: http://daringfireball.net/projects/markdown/ +[ReST]: http://docutils.sourceforge.net/rst.html +[pandoc]: http://johnmacfarlane.net/pandoc/ +[wikis]: http://code.google.com/p/pandocwiki/ + +- [rubber]: a usable frontend for LaTeX and all its friends: BibTeX, image + converters, etc. Similar to but more modular than [latexmk]. + +[rubber]: http://www.pps.jussieu.fr/~beffara/soft/rubber/ +[latexmk]: http://www.phys.psu.edu/~collins/software/latexmk-jcc/ + +<!-- TODO ensure the following is formatted correctly. --> +- [Gobby]: A collaborative text editor. I always end up underestimating the + utility and power of collaborative editing. Examples of things I've used it + for: + - peer programming + - explaining things to multiple parties, using this as a whiteboard + - online meetings: everybody ends up communicating exclusively in the + document, and no more meeting minutes are necessary + If only the quirks could be removed from Emacs' displays on different + frames. Vim also has a "collaborative editing" as an item sponsors can vote + on. UIs of tomorrow should support multiple simultaneous user inputs. + +[Gobby]: http://gobby.0x539.de/trac/ + +- [Opera]: (Probably the most popular on this list.) Not FOSS, but for me web + browsing is too critical an application to settle for second-best, and Opera + is my favorite. And if I trusted it with my mail, M2 would also be very far + ahead as well (alas, it's too buggy). KHTML is probably the + closest-performing engine, but I've found it to be too crash-prone. + +[Opera]: http://www.opera.com/ + +- [gprof2dot]: A handy tool for visualizing the results of gprof, the Google + CPU profiler, python cProfile, and more. + +[gprof2dot]: http://code.google.com/p/jrfonseca/wiki/Gprof2Dot This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-07 02:07:12
|
Revision: 336 http://assorted.svn.sourceforge.net/assorted/?rev=336&view=rev Author: yangzhang Date: 2008-02-06 18:07:16 -0800 (Wed, 06 Feb 2008) Log Message: ----------- added readme Added Paths: ----------- hash-join/trunk/README Added: hash-join/trunk/README =================================================================== --- hash-join/trunk/README (rev 0) +++ hash-join/trunk/README 2008-02-07 02:07:16 UTC (rev 336) @@ -0,0 +1,14 @@ +This is a simple implementation of parallel hash joins. I'm using this as a +first step in studying the performance problems in multicore systems +programming. This implementation is tailored for a particular dataset, the IMDB +`movies.list` and `actresses.list` files, which may be found [here]. + +The `tools/` directory contains `DbPrep.scala`, which is a filter for the +`.list` files to prepare them to be more easily parsed by the hash join +application. + +The `tools/` directory also contains `LogProc.scala`, which processes stdout +concatenated from multiple runs of the program. This will produce the time and +speedup plots illustrating the scalability of the system. + +[here]: http://us.imdb.com/interfaces#plain This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-07 01:37:32
|
Revision: 335 http://assorted.svn.sourceforge.net/assorted/?rev=335&view=rev Author: yangzhang Date: 2008-02-06 17:37:32 -0800 (Wed, 06 Feb 2008) Log Message: ----------- fixed int overflow (to a limited degree) Modified Paths: -------------- hash-join/trunk/src/Makefile hash-join/trunk/src/hashjoin.cc Modified: hash-join/trunk/src/Makefile =================================================================== --- hash-join/trunk/src/Makefile 2008-02-07 01:35:37 UTC (rev 334) +++ hash-join/trunk/src/Makefile 2008-02-07 01:37:32 UTC (rev 335) @@ -1,7 +1,8 @@ all: hashjoin hashjoin: hashjoin.cc - g++ -g3 -Wall -o hashjoin hashjoin.cc -lprofiler -lpthread + # g++ -g3 -Wall -o hashjoin hashjoin.cc -lprofiler -lpthread + g++ -O3 -Wall -o hashjoin hashjoin.cc -lprofiler -lpthread clean: rm -f hashjoin Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-07 01:35:37 UTC (rev 334) +++ hash-join/trunk/src/hashjoin.cc 2008-02-07 01:37:32 UTC (rev 335) @@ -321,7 +321,8 @@ for (unsigned int i = 0; i < ncpus; i++) { buckets[i] = new bucket[ncpus]; for (unsigned int j = 0; j < ncpus; j++) { - int bucket_size = max(1000000UL,buflen / ncpus * 3); + // TODO dependency injection + size_t bucket_size = max(1000000UL,buflen / ncpus * 3); // Each bucket should be twice as large as it would be given uniform // distribution. This is just an initial size; extending can happen. buckets[i][j].bufs.push_back(new char[bucket_size]); @@ -362,7 +363,7 @@ { size_t h = __stl_hash_string(s); unsigned int bucket = h % (map_size * ncpus) / map_size; - int bucket_size = max(1000000UL,buflen / ncpus * 3); + size_t bucket_size = max(1000000UL,buflen / ncpus * 3); if (heads[bucket] + nbytes < bs[bucket].bufs.back() + bucket_size) { memcpy(heads[bucket], p, nbytes); heads[bucket] += nbytes; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-07 01:35:34
|
Revision: 334 http://assorted.svn.sourceforge.net/assorted/?rev=334&view=rev Author: yangzhang Date: 2008-02-06 17:35:37 -0800 (Wed, 06 Feb 2008) Log Message: ----------- fixed graphs Modified Paths: -------------- hash-join/trunk/tools/LogProc.scala hash-join/trunk/tools/Makefile Modified: hash-join/trunk/tools/LogProc.scala =================================================================== --- hash-join/trunk/tools/LogProc.scala 2008-02-07 00:50:59 UTC (rev 333) +++ hash-join/trunk/tools/LogProc.scala 2008-02-07 01:35:37 UTC (rev 334) @@ -77,7 +77,7 @@ // Instruct gnuplot. def f(s:String) = { { - for ((field,_) <- map) yield ( + for ((field,_) <- plotData) yield ( "'" + camelToHyphen(field) + s + ".dat" + "' with linespoints title '" + fieldNameToLabel(field) + "'" ) } mkString ", " Modified: hash-join/trunk/tools/Makefile =================================================================== --- hash-join/trunk/tools/Makefile 2008-02-07 00:50:59 UTC (rev 333) +++ hash-join/trunk/tools/Makefile 2008-02-07 01:35:37 UTC (rev 334) @@ -16,7 +16,7 @@ scala -cp out DbPrep proc: out/LogProc.class - scala -cp out LogProc log + scala -cp out LogProc log-opt clean: rm -rf out This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-07 00:51:20
|
Revision: 333 http://assorted.svn.sourceforge.net/assorted/?rev=333&view=rev Author: yangzhang Date: 2008-02-06 16:50:59 -0800 (Wed, 06 Feb 2008) Log Message: ----------- checks out Modified Paths: -------------- hash-join/trunk/src/hashjoin.cc Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-07 00:46:50 UTC (rev 332) +++ hash-join/trunk/src/hashjoin.cc 2008-02-07 00:50:59 UTC (rev 333) @@ -192,7 +192,6 @@ public: ~bucket() { - // XXX check this for (size_t i = 0; i < bufs.size(); i++) { delete [] bufs[i]; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-07 00:47:03
|
Revision: 332 http://assorted.svn.sourceforge.net/assorted/?rev=332&view=rev Author: yangzhang Date: 2008-02-06 16:46:50 -0800 (Wed, 06 Feb 2008) Log Message: ----------- added prep and eval tools Added Paths: ----------- hash-join/trunk/tools/ hash-join/trunk/tools/DbPrep.scala hash-join/trunk/tools/LogProc.scala hash-join/trunk/tools/Makefile Added: hash-join/trunk/tools/DbPrep.scala =================================================================== --- hash-join/trunk/tools/DbPrep.scala (rev 0) +++ hash-join/trunk/tools/DbPrep.scala 2008-02-07 00:46:50 UTC (rev 332) @@ -0,0 +1,76 @@ +import commons.Control._ +import commons.Io._ +import java.util.regex._ +object DbPrep { + def extract(p: Pattern, s: String) = { + val m = p matcher s + m.find + (m group 1, m group 2) + } + def cleanTitle(line: String) = { + val t = line indexOf " " + if (t > 0) line take t else line + } + def main(args: Array[String]) { + val pMovie = Pattern compile """^([^\t]+)\t+(.*)$""" + val pActress = Pattern compile """^([^\t]+)\t+([^\t]+)$""" + val (doMovies, doActresses) = (true, true) + if (doMovies) { + using (TextReader("movies.list")) { r => + using (TextWriter("movies.dat")) { w => + var line = r.readLine + try { + var body = false + while (line != null) { + if (body && (line contains "----------------")) { + body = false + } + if (body && line != "") { + val (title, release) = extract(pMovie, line) + w print (title + "\0" + release + "\0\0") + } + if (!body && (line contains "=======")) { + body = true + } + line = r.readLine + } + } catch { + case e: Exception => { Console.err.println(line); throw e } + } + } + } + } + if (doActresses) { + using (TextReader("actresses.list")) { r => + using (TextWriter("actresses.dat")) { w => + var line = r.readLine + try { + var body = false + while (line != null) { + if (body && (line contains "----------------")) { + body = false + } + if (body && line != "") { + val (actress, title) = extract(pActress, line) + w print (actress + "\0" + cleanTitle(title) + "\0") + while (line != "") { + line = r.readLine.trim + if (line != "") { + w print (cleanTitle(title) + "\0") + } + } + w print "\0" + } + if (!body && ((line contains "\t") && (line startsWith "----") && (line endsWith "----"))) { + body = true + } + line = r.readLine + } + } catch { + case e: Exception => { Console.err.println(line); throw e } + } + } + } + } + } +} Added: hash-join/trunk/tools/LogProc.scala =================================================================== --- hash-join/trunk/tools/LogProc.scala (rev 0) +++ hash-join/trunk/tools/LogProc.scala 2008-02-07 00:46:50 UTC (rev 332) @@ -0,0 +1,98 @@ +import commons.Collections._ +import commons.Control._ +import commons.Io._ +// import commons.Plotting._ +import scala.collection.mutable._ + +object LogProc { + type FieldMap = Map[String,Int] + type MutFieldMap = HashMap[String,Int] + case class Stats( + ncpus: Int, + values: FieldMap + ) + val descriptors = Array( + ("movieLoading", "loading movies" ), + ("actressLoading", "loading actresses" ), + ("moviePartitioning", "hash-partitioning movies" ), + ("actressPartitioning", "hash-partitioning actresses" ), + ("movieBuilding", "building with movies" ), + ("actressProbing", "probing with actresses" ), + ("sum", "sum" ) + ) + val fieldNameToLabel = Map(descriptors: _*) + def fieldName(k: Int) = descriptors(k)._1 + def main(args: Array[String]) { + val lines = using (TextReader(args(0))) (_.readLines.toArray) + val map = new MutFieldMap + var ncpus = 0 + val stats = new ArrayBuffer[Stats] + var fieldIndex = Iterator from 0 + + // Parse logs into Stats. + for (line <- lines) { + if (line contains " cpus") { + // Include sum. + map("sum") = sum(map.values) + if (ncpus != 0) stats += Stats(ncpus, map.clone) + ncpus = line.split(" ")(1).toInt + fieldIndex = Iterator from 0 + map.clear + } else if (line contains "main time: ") { + map(fieldName(fieldIndex.next)) = line.split(" ").last.toInt + } + } + + // Build actual plot data. + val plotData = new HashMap[String,ArrayBuffer[Int]] { + override def default(k: String) = { + val buf = new ArrayBuffer[Int] + this(k) = buf + buf + } + } + val ncpuList = stats map (_.ncpus) + for (Stats(ncpus, map) <- stats) { + for (field <- map.keys) { + plotData(field) += map(field) + } + } + + // Produce the time and speedup .dats. + for ((field,times) <- plotData) { + val baseline = times(0).toDouble + println(field + ": " + times) + using (TextWriter(camelToHyphen(field) + "-time.dat")) { w => + for ((time,ncpus) <- times zip ncpuList) { + w.println(ncpus + " " + time) + } + } + using (TextWriter(camelToHyphen(field) + "-speedup.dat")) { w => + for ((time,ncpus) <- times map (baseline / _) zip ncpuList) { + w.println(ncpus + " " + time) + } + } + } + + // Instruct gnuplot. + def f(s:String) = { + { + for ((field,_) <- map) yield ( + "'" + camelToHyphen(field) + s + ".dat" + "' with linespoints title '" + fieldNameToLabel(field) + "'" + ) + } mkString ", " + } + run("gnuplot", """ + set terminal pdf + set xlabel 'number of threads' + + set output 'times.pdf' + set ylabel 'time (ms)' + plot """ + f("-time") + """ + + set output 'speedups.pdf' + set ylabel 'speedup (relative to 1 thread)' + plot """ + f("-speedup") + ) + } +} Added: hash-join/trunk/tools/Makefile =================================================================== --- hash-join/trunk/tools/Makefile (rev 0) +++ hash-join/trunk/tools/Makefile 2008-02-07 00:46:50 UTC (rev 332) @@ -0,0 +1,24 @@ +COMMONS_SRCS := $(wildcard commons/*.scala) +DBPREP_SRCS := DbPrep.scala $(COMMONS_SRCS) +LOGPREP_SRCS := LogProc.scala $(COMMONS_SRCS) + +all: out/DbPrep.class out/LogProc.class + +out/DbPrep.class: $(DBPREP_SRCS) + mkdir -p out + fsc -deprecation -d out $^ + +out/LogProc.class: $(LOGPREP_SRCS) + mkdir -p out + fsc -deprecation -d out $^ + +run: out/DbPrep.class + scala -cp out DbPrep + +proc: out/LogProc.class + scala -cp out LogProc log + +clean: + rm -rf out + +.PHONY: clean run This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-06 23:44:17
|
Revision: 331 http://assorted.svn.sourceforge.net/assorted/?rev=331&view=rev Author: yangzhang Date: 2008-02-06 15:44:14 -0800 (Wed, 06 Feb 2008) Log Message: ----------- added initial ncpu logging Modified Paths: -------------- hash-join/trunk/src/hashjoin.cc Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-06 23:01:49 UTC (rev 330) +++ hash-join/trunk/src/hashjoin.cc 2008-02-06 23:44:14 UTC (rev 331) @@ -277,6 +277,8 @@ const char *movies = argv[2]; const char *actresses = argv[3]; + cout << "using " << ncpus << " cpus" << endl; + timer t("main time: "); // Load the data files. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-06 23:01:54
|
Revision: 330 http://assorted.svn.sourceforge.net/assorted/?rev=330&view=rev Author: yangzhang Date: 2008-02-06 15:01:49 -0800 (Wed, 06 Feb 2008) Log Message: ----------- cleaned up Modified Paths: -------------- hash-join/trunk/src/hashjoin.cc Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-06 17:29:57 UTC (rev 329) +++ hash-join/trunk/src/hashjoin.cc 2008-02-06 23:01:49 UTC (rev 330) @@ -1,3 +1,4 @@ +#include <memory> #include <cassert> #include <cstdio> #include <iostream> @@ -3,5 +4,4 @@ #include <exception> #include <vector> - #include <ext/hash_map> @@ -10,27 +10,38 @@ #include <sys/stat.h> #include <sys/time.h> #include <fcntl.h> - #include <pthread.h> #include "method_thread1.h" // -// c++ commons :: numa -// TODO document +// C++ Commons :: NUMA // using namespace std; -// TODO replace with a macro +// TODO: Figure out how to create an exception with a useful message. inline void -check(bool cond) +_check(bool cond, const char *msg, const char *file, int line) { if (!cond) { throw exception(); } } +#define check(cond) _check(cond, NULL, __FILE__, __LINE__) + +/** + * Similar to assert(), but is not conditionally compiled, so this is safe to + * use as a guard against expected failures (such as checking return codes). + */ +#define checkmsg(cond, msg) \ + bool b = cond; \ + if (!b) _check(b, (msg), __FILE__, __LINE__) + +/** + * Search in p for the nth instance of c and return the character past it. + */ inline const char * strchrrep(const char *p, char c, int n) { @@ -42,13 +53,23 @@ return p; } +/** + * Search in p for the nth instance of c and return the character past it. + * TODO figure out if there's a way to merge this and the above rather than + * maintaining two versions. (Related to Linus Torvalds' post on const?) + */ inline char * strchrrep(char *p, char c, int n) { return const_cast<char *>(strchrrep(const_cast<const char *>(p), c, n)); } -inline long long current_time_millis() { +/** + * Get the current time in milliseconds. + */ +inline long long +current_time_millis() +{ long long t; struct timeval tv; @@ -60,12 +81,16 @@ return t; } +/** + * Convenience class for performing wall-clock benchmarking. + */ class timer { public: timer(const string label) : label(label), start(current_time_millis()), last(start) {} - void print() { + void print() + { long long now = current_time_millis(); cout << label << now - last << endl; last = now; @@ -75,11 +100,47 @@ long long start, last; }; -void -load_file(const char *path, char *&buf, size_t & len, unsigned int ncpus) { +/** + * A functor that checks for string equality. Mainly useful as a template + * parameter to the hash data structures in STL extensions. + */ +struct eqstr +{ + bool operator()(const char* s1, const char* s2) const + { + return strcmp(s1, s2) == 0; + } +}; + +/** + * Look for a substring, but without null-termination conventions. + */ +inline char * +unsafe_strstr(char *p, char *q, char *lim) +{ + if (lim == 0) { + while (true) { + for (; !(*p == '\0' && *(p+1) == '\0'); p++); + return p; + } + } else { + check(p < lim); + while (true) { + for (; !(*p == '\0' && *(p+1) == '\0') && p < lim; p++); + if (p == lim) return NULL; + return p; + } + } +} + +/** + * Load an entire file into buf and also give us the length of the buffer. + * TODO this probably isn't very safe, since we're demoting an off_t to a + * size_t. Is there a healthier approach? + */ +char * +load_file(const char *path, size_t & len, unsigned int ncpus) { struct stat sb; - // pthread_t tha[CPUS]; - // void *value; int fd; fd = open(path, 0); @@ -88,119 +149,122 @@ check(fstat(fd, &sb) == 0); check(sb.st_size <= 0xffffffff); - // TODO why don't i need (static) cast here? + // TODO Why don't we need (static) cast here? Isn't this a lossy cast? len = sb.st_size; - buf = new char[len + 1]; + char *buf = new char[len + 1]; check(buf); - // XXX use threads to pull data to the correct initial locations? -// #if CPUS > 1 + // TODO Use threads to pull data to the correct initial locations? size_t chunk_len = len / ncpus; for (unsigned int i = 0; i < ncpus; i++) { - // TODO review C++ cast rules int off = i *chunk_len; ssize_t status = pread(fd, buf + off, chunk_len, off); - // we read the whole chunk or hit the end + // We read the whole chunk or hit the end. size_t nread = static_cast<ssize_t>(status); check(status != -1 && (nread == chunk_len || off + nread == len)); -// tha[i] = method_thread1(this, &MapReduce::readin1, i *chunk_len, chunk_len); } -// for(i = 0; i < ncpus; i++) -// check(pthread_join(tha[i], &value) == 0); -// #else -// readin1(0, len); -// #endif - check(close(fd) == 0); buf[len] = '\0'; // don't let strcmp() run off the end + return buf; } // -// hashjoin +// Hash Join // using namespace std; using namespace __gnu_cxx; -struct eqstr -{ - bool operator()(const char* s1, const char* s2) const - { - return strcmp(s1, s2) == 0; - } -}; - -// TODO dependency injection +// TODO use dependency injection! unsigned int ncpus = 1; typedef hash_map<const char *, const void *, hash<const char *>, eqstr> hmap; const hmap::size_type map_size = 10000000; -class bucket { +/** + * Buckets are produced in the hash-partitioning phase. These are simple + * storage containers. + */ +class bucket +{ public: + ~bucket() + { + // XXX check this + for (size_t i = 0; i < bufs.size(); i++) { + delete [] bufs[i]; + } + } + /** + * The sizes of the bufs. Should always be the same length as bufs. + */ vector<size_t> sz; + /** + * The data that we hold. + */ vector<char *> bufs; }; -class db { +/** + * An abstract in-memory database that holds "tuples" in a contiguous buffer. + * The format/interpretation of the buffers is up to the subclasses. + */ +class db +{ public: - db(const char *path) { load_file(path, buf, buflen, ncpus); } + db(const char *path) : buf(load_file(path, buflen, ncpus)) {} const bucket **partition(); + /** + * This routine runs on each processor to hash-partition the data into local + * buckets. + */ virtual void partition1(unsigned int pid, bucket* bucket) = 0; - virtual ~db() {} + virtual ~db() { delete [] buf; } unsigned int push_bucket(char **heads, bucket *bs, const char *s, const char *p, size_t nbytes); protected: - // TODO smart pointer char *buf; size_t buflen; }; -class movdb : public db { +/** + * This is something which we must free. + */ +class movdb : public db +{ public: movdb(const char *path) : db(path) {} virtual ~movdb() {} + /** + * Build the hash map in parallel. + */ const hmap *build(const bucket **movbucs); + /** + * Each node runs this routine to construct its local hash map. + */ void build1(unsigned int pid, const bucket **movbucs, hmap *h); void partition1(unsigned int pid, bucket* bucket); }; -class actdb : public db { +class actdb : public db +{ public: actdb(const char *path) : db(path) {} virtual ~actdb() {} + /** + * Probe the hash maps with tuples from the actor buckets. + */ void probe(const hmap *hs, const bucket **actbucs, bool show_progress); - void probe1(unsigned int pid, const hmap *hh, const bucket **actbucs); + /** + * Each node runs this routine to probe into its local hash map using tuples + * from actor buckets destined for that node. + */ + void probe1(unsigned int pid, const hmap *ph, const bucket **actbucs); void partition1(unsigned int pid, bucket* bucket); }; -// template <typename T> -// class bucket { -// public: -// bucket(int count) { -// ts = new (T*)[count]; -// } -// private: -// T *ts; -// } -// -// typedef vector<const char *> bucket; -// -// class hmap { -// public: -// hmap(int nbuckets) : nbuckets(nbuckets), nentries(0) { -// buckets = new bucket[nbuckets]; -// check(buckets); -// } -// hmap() : nbuckets(nbuckets_default); -// private: -// bucket *buckets; -// int nbuckets; -// int nentries; -// }; - int main(int argc, char *argv[]) { @@ -215,14 +279,18 @@ timer t("main time: "); + // Load the data files. + cout << "loading movies" << endl; - movdb mdb(movies); // "../movie-data/movies.dat" + movdb mdb(movies); t.print(); cout << "loading actresses" << endl; - actdb adb(actresses); // "../movie-data/mdactresses.dat" + actdb adb(actresses); t.print(); + // Hash-partition the data among the nodes. + cout << "hash-partitioning movies into per-core buckets" << endl; const bucket **movbucs = mdb.partition(); t.print(); @@ -231,6 +299,8 @@ const bucket **actbucs = adb.partition(); t.print(); + // Perform the hash-join. + cout << "building with movies" << endl; const hmap *hs = mdb.build(movbucs); t.print(); @@ -271,53 +341,10 @@ check(pthread_join(ts[i], &value) == 0); } - // // Now from the consumer - // for (int i = 0; i < ncpus; i++) { - // ts[i] = method_thread1( - // // XXX - // ); - // } return const_cast<const bucket**>(buckets); // TODO why is this cast needed? } -// XXX -//inline const char * -//unsafe_strstr(const char *p, const char *q, const char *lim) -//{ -// while (true) { -// if (lim > 0 && p >= lim) return NULL; -// p = strchr(p, '\0') + 1; -// if (lim > 0 && p >= lim) return NULL; -// if (*p == '\0') return p; -// } -//} - inline char * -unsafe_strstr(char *p, char *q, char *lim) -{ - if (lim == 0) { - while (true) { - for (; !(*p == '\0' && *(p+1) == '\0'); p++); - return p; - } - } else { - while (true) { - for (; !(*p == '\0' && *(p+1) == '\0') && p < lim; p++); - if (p == lim) return NULL; - return p; - } - } -} - -// inline char * -// unsafe_strstr(char *p, char *q, char *lim) -// { -// return const_cast<char *>(unsafe_strstr(const_cast<const char*>(p), -// const_cast<const char*>(q), -// const_cast<const char*>(lim))); -// } - -inline char * next_tuple(char *p) { char *next = unsafe_strstr(p, "\0\0", 0); @@ -334,16 +361,12 @@ { size_t h = __stl_hash_string(s); unsigned int bucket = h % (map_size * ncpus) / map_size; - //cout << s << " : " << bucket << endl; - //size_t bucket_size = max(1000000,buflen / ncpus * 2); //2 * buflen / ncpus; int bucket_size = max(1000000UL,buflen / ncpus * 3); if (heads[bucket] + nbytes < bs[bucket].bufs.back() + bucket_size) { memcpy(heads[bucket], p, nbytes); heads[bucket] += nbytes; return -1; } else { - //cout << s << endl; - // cout << (uintptr_t)heads[bucket] << " " << nbytes << " " << (uintptr_t)bs[bucket].buf << " " << bucket_size << endl; bs[bucket].sz.back() = heads[bucket] - bs[bucket].bufs.back(); bs[bucket].bufs.push_back(new char[bucket_size]); check(bs[bucket].bufs.back()); @@ -369,22 +392,15 @@ for (unsigned int i = 0; i < ncpus; i++) { heads[i] = bs[i].bufs[0]; } - // Statistics (TODO dynamic allocation) - int counter = 0, mincount = INT_MAX; + int counter = 0; char *p = partstart, *end = partend; + // Iterate over the partitions. while (p < end) { - // cout << "remaining: " << end - p << " " << (uintptr_t) p << " ; " << ((int) *(p-1)) << " ; " << ((int) *(p)) << " ; " << ((int) *(p+1)) << endl; char *title = p; char *release = strchr(p, '\0') + 1; p = strchr(release, '\0') + 2; - // printf("%s (%d) / %s (%d) %d %d %d %d\n", title, strlen(title), release, strlen(release), *(p - 4), *(p - 3), *(p - 2), *(p - 1)); - // Copy this line into the correct local bucket. - if (-1 != push_bucket(heads, bs, title, title, p - title)) { - //cout << "FUCK " << heads[0] - bs[0].buf << " " << heads[1] - bs[1].buf << " " << p - title << endl; - //mincount = min(mincount, counter); - //if (mincount == counter) cout << "CRAP" << counter << endl; - //cout << "overflowed on: " << title << endl; - } + // Copy this tuple into the correct local bucket. + push_bucket(heads, bs, title, title, p - title); counter++; } // Record the written size of each bucket. @@ -411,37 +427,33 @@ for (unsigned int i = 0; i < ncpus; i++) { heads[i] = bs[i].bufs[0]; } + + // This is used for creating (name, title) tuples. (No tuple may exceed 1024 + // bytes.) char tmp[1024]; - // Statistics (TODO dynamic allocation) - int counter = 0, mincount = INT_MAX; + // Iterate over the partitions. char *p = partstart, *end = partend; + int counter = 0; while (p < end) { char *name = p; p = strchr(p, '\0') + 1; + // Fill in the first part of the tuple. strcpy(tmp, name); char *subtmp = tmp + strlen(name) + 1; - char *tuple_end = unsafe_strstr(p, "\0\0", end) + 2; while (true) { char *title = p; p = strchr(p, '\0') + 1; + // Fill in the second half of the tuple. strcpy(subtmp, title); size_t tmplen = subtmp + strlen(subtmp) + 2 - tmp; check(tmplen < 1024); tmp[tmplen-1] = '\0'; - // Copy this line into the correct local bucket. - //cout << "hashing " << title << endl; + // Copy the tuple into the correct local bucket. unsigned int bbb; - if (-1 != (bbb = push_bucket(heads, bs, title, tmp, tmplen))) { - //size_t bucket_size = max(1000000,buflen / ncpus * 2); //2 * buflen / ncpus; - //int bucket_size = max(1000000UL,buflen / ncpus * 3); - //cout << "FUCK " << heads[0] - bs[0].buf << " " << bucket_size << " " << heads[1] - bs[1].buf << " " << p - title << endl; - ////mincount = min(mincount, counter); - ////if (mincount == counter) cout << "CRAP" << counter << endl; - //cout << "overflowed " << bbb << " on: " << name << endl; - } + bbb = push_bucket(heads, bs, title, tmp, tmplen); counter++; // End of tuple? @@ -456,6 +468,8 @@ for (unsigned int i = 0; i < ncpus; i++) { bs[i].sz.back() = heads[i] - bs[i].bufs.back(); } + + // TODO fix this log msg to sum up the sz's rather than just showing the last cout << "actress count " << counter << " nbytes " << bs[0].sz.back()<< endl; } @@ -475,9 +489,9 @@ } void -movdb::build1(unsigned int pid, const bucket **movbucs, hmap *hh) +movdb::build1(unsigned int pid, const bucket **movbucs, hmap *ph) { - hmap &h = *hh; + hmap &h = *ph; // Visit each bucket that's destined to us (visit each source). for (unsigned int i = 0; i < ncpus; i++) { char *p = movbucs[i][pid].bufs[0], @@ -489,18 +503,10 @@ // Insert into hash map. h[title] = release; } - //cout << "cpu " << pid << " src " << i << " cumulative h.size " << h.size() - //<< endl; } } void -join(const char *movie, const char *actress) -{ - // cout << "JOINED: " << movie << " WITH " << actress << endl; -} - -void actdb::probe(const hmap *hs, const bucket **actbucs, bool show_progress) { pthread_t ts[ncpus]; @@ -513,31 +519,41 @@ } } +/** + * Dummy function that is called to represent emitting a joined tuple. + */ +inline void +join(const char *movie, const char *actress) +{ + if (false) cout << "JOINED: " << movie << " WITH " << actress << endl; +} + void -actdb::probe1(unsigned int pid, const hmap *hh, const bucket **actbucs) +actdb::probe1(unsigned int pid, const hmap *ph, const bucket **actbucs) { - const hmap &h = *hh; + const hmap &h = *ph; int hits = 0, misses = 0; + // For each source bucket. for (unsigned int i = 0; i < ncpus; i++) { char *p = actbucs[i][pid].bufs[0], *end = actbucs[i][pid].bufs[0] + actbucs[i][pid].sz[0]; + // Iterate over the bucket. while (p < end) { char *name = p; p = strchr(p, '\0') + 1; while (true) { char *title = p; p = strchr(p, '\0') + 1; - //cout << "name " << name << " title: " << title << p - title << endl; - // Emit any joined tuple. + // Emit the joined tuple (if a join was possible). if (h.find(title) != h.end()) { - //cout << " HIT" << endl; hits++; join(title, name); } else { cout << " MISS " << title << endl; misses++; } - // End of tuple? + // End of a tuple? (Don't actually need this check, since the + // hash-partitioning "normalizes" the tuples from the actresses file.) if (*p == '\0') { p++; break; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-06 17:29:57
|
Revision: 329 http://assorted.svn.sourceforge.net/assorted/?rev=329&view=rev Author: yangzhang Date: 2008-02-06 09:29:57 -0800 (Wed, 06 Feb 2008) Log Message: ----------- fixed more bugs Modified Paths: -------------- hash-join/trunk/src/hashjoin.cc Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-06 17:09:53 UTC (rev 328) +++ hash-join/trunk/src/hashjoin.cc 2008-02-06 17:29:57 UTC (rev 329) @@ -377,6 +377,7 @@ char *title = p; char *release = strchr(p, '\0') + 1; p = strchr(release, '\0') + 2; + // printf("%s (%d) / %s (%d) %d %d %d %d\n", title, strlen(title), release, strlen(release), *(p - 4), *(p - 3), *(p - 2), *(p - 1)); // Copy this line into the correct local bucket. if (-1 != push_bucket(heads, bs, title, title, p - title)) { //cout << "FUCK " << heads[0] - bs[0].buf << " " << heads[1] - bs[1].buf << " " << p - title << endl; @@ -504,7 +505,7 @@ { pthread_t ts[ncpus]; for (unsigned int i = 0; i < ncpus; i++) { - ts[i] = method_thread1(this, &actdb::probe1, i, hs, actbucs); + ts[i] = method_thread1(this, &actdb::probe1, i, &hs[i], actbucs); } for (unsigned int i = 0; i < ncpus; i++) { void *value; @@ -533,7 +534,7 @@ hits++; join(title, name); } else { - //cout << " MISS" << endl; + cout << " MISS " << title << endl; misses++; } // End of tuple? This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-06 17:09:49
|
Revision: 328 http://assorted.svn.sourceforge.net/assorted/?rev=328&view=rev Author: yangzhang Date: 2008-02-06 09:09:53 -0800 (Wed, 06 Feb 2008) Log Message: ----------- fixed some bugs Modified Paths: -------------- hash-join/trunk/src/hashjoin.cc Modified: hash-join/trunk/src/hashjoin.cc =================================================================== --- hash-join/trunk/src/hashjoin.cc 2008-02-06 16:19:42 UTC (rev 327) +++ hash-join/trunk/src/hashjoin.cc 2008-02-06 17:09:53 UTC (rev 328) @@ -338,7 +338,7 @@ //size_t bucket_size = max(1000000,buflen / ncpus * 2); //2 * buflen / ncpus; int bucket_size = max(1000000UL,buflen / ncpus * 3); if (heads[bucket] + nbytes < bs[bucket].bufs.back() + bucket_size) { - memcpy(heads[bucket], s, nbytes); + memcpy(heads[bucket], p, nbytes); heads[bucket] += nbytes; return -1; } else { @@ -390,7 +390,7 @@ for (unsigned int i = 0; i < ncpus; i++) { bs[i].sz.back() = heads[i] - bs[i].bufs.back(); } - cout << "movie count " << counter << " vs " << bs[0].sz.back()<< endl; + cout << "movie count " << counter << " nbytes " << bs[0].sz.back()<< endl; } void @@ -415,7 +415,7 @@ // Statistics (TODO dynamic allocation) int counter = 0, mincount = INT_MAX; char *p = partstart, *end = partend; - while (p < end - 999) { + while (p < end) { char *name = p; p = strchr(p, '\0') + 1; strcpy(tmp, name); @@ -426,12 +426,14 @@ p = strchr(p, '\0') + 1; strcpy(subtmp, title); - size_t strl = strlen(subtmp); + size_t tmplen = subtmp + strlen(subtmp) + 2 - tmp; + check(tmplen < 1024); + tmp[tmplen-1] = '\0'; // Copy this line into the correct local bucket. //cout << "hashing " << title << endl; unsigned int bbb; - if (-1 != (bbb = push_bucket(heads, bs, title, tmp, subtmp + strl + 1 - tmp))) { + if (-1 != (bbb = push_bucket(heads, bs, title, tmp, tmplen))) { //size_t bucket_size = max(1000000,buflen / ncpus * 2); //2 * buflen / ncpus; //int bucket_size = max(1000000UL,buflen / ncpus * 3); //cout << "FUCK " << heads[0] - bs[0].buf << " " << bucket_size << " " << heads[1] - bs[1].buf << " " << p - title << endl; @@ -453,7 +455,7 @@ for (unsigned int i = 0; i < ncpus; i++) { bs[i].sz.back() = heads[i] - bs[i].bufs.back(); } - cout << "actress count " << counter << " vs " << bs[0].sz.back()<< endl; + cout << "actress count " << counter << " nbytes " << bs[0].sz.back()<< endl; } const hmap * @@ -514,23 +516,24 @@ actdb::probe1(unsigned int pid, const hmap *hh, const bucket **actbucs) { const hmap &h = *hh; + int hits = 0, misses = 0; for (unsigned int i = 0; i < ncpus; i++) { char *p = actbucs[i][pid].bufs[0], *end = actbucs[i][pid].bufs[0] + actbucs[i][pid].sz[0]; - int hits = 0, misses = 0; while (p < end) { char *name = p; p = strchr(p, '\0') + 1; while (true) { char *title = p; p = strchr(p, '\0') + 1; - // cout << "name " << name << "title: " << title << p - title << endl; + //cout << "name " << name << " title: " << title << p - title << endl; // Emit any joined tuple. if (h.find(title) != h.end()) { //cout << " HIT" << endl; hits++; join(title, name); } else { + //cout << " MISS" << endl; misses++; } // End of tuple? @@ -540,9 +543,8 @@ } } } - //cout << "cpu " << pid << " src " << i << " hits " << hits << " misses " << - //misses << endl; } + cout << "cpu " << pid << " hits " << hits << " misses " << misses << endl; } // vim:et:sw=2:ts=2 This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |