[Assorted-commits] SF.net SVN: assorted: [601] numa-bench/trunk/src
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-03-05 05:49:10
|
Revision: 601 http://assorted.svn.sourceforge.net/assorted/?rev=601&view=rev Author: yangzhang Date: 2008-03-04 21:49:10 -0800 (Tue, 04 Mar 2008) Log Message: ----------- updated chew Modified Paths: -------------- numa-bench/trunk/src/build numa-bench/trunk/src/chew.bash numa-bench/trunk/src/chew.cc Modified: numa-bench/trunk/src/build =================================================================== --- numa-bench/trunk/src/build 2008-03-04 06:31:56 UTC (rev 600) +++ numa-bench/trunk/src/build 2008-03-05 05:49:10 UTC (rev 601) @@ -1,8 +1,7 @@ chew: srcs: [chew.cc] - libs: [pthread] + libs: [pthread, numa] avail: srcs: [avail.cc] libs: [pthread] - Modified: numa-bench/trunk/src/chew.bash =================================================================== --- numa-bench/trunk/src/chew.bash 2008-03-04 06:31:56 UTC (rev 600) +++ numa-bench/trunk/src/chew.bash 2008-03-05 05:49:10 UTC (rev 601) @@ -2,42 +2,67 @@ set -o errexit -o nounset -make -s chew-opt +reps=3 -function run { - for i in {1..3} - do out/chew-opt "$@" +make -s chew-dbg + +run() { + for i in {1..$reps} + do out/chew-dbg "$@" done } +K=000 M=000000 G=000000000 KB=000 MB=000000 GB=000000000 -# ncpus size nreps shuffle par pin local write cross rrnodes nnodes ncpus +# ncpus size opcount nreps shuffle par pin local write cross rrnodes nnodes ncpus -echo writes -run 16 100$MB 1 0 0 1 0 1 0 1 4 16 -run 16 1000$MB 1 0 0 1 0 1 0 1 4 16 -run 16 100$MB 10 0 0 1 0 1 0 1 4 16 -run 16 100$MB 1 1 0 1 0 1 0 1 4 16 +if true ; then -echo reads -run 16 1000$MB 1 0 0 1 0 0 0 1 4 16 -run 16 100$MB 1 1 0 1 0 0 0 1 4 16 +#echo writes +#run 16 100$MB 100$M 1 0 0 1 0 1 0 1 4 16 +#run 16 1000$MB 1000$M 1 0 0 1 0 1 0 1 4 16 +#run 16 100$MB 100$M 10 0 0 1 0 1 0 1 4 16 +#run 16 1000$GB 100$M 1 1 0 1 0 1 0 1 4 16 +# +#echo reads +#run 16 1000$MB 100$M 1 0 0 1 0 0 0 1 4 16 +#run 16 1000$MB 10$M 1 1 0 1 0 0 0 1 4 16 for n in 1 2 4 8 12 16 ; do echo par - run $n 10$MB 1 0 1 1 0 0 0 1 4 16 - run $n 10$MB 1 1 1 1 0 0 0 1 4 16 - run $n 10$MB 1 0 1 1 1 0 0 1 4 16 - run $n 10$MB 1 1 1 1 1 0 0 1 4 16 - run $n 10$MB 1 0 1 1 0 1 0 1 4 16 - run $n 10$MB 1 1 1 1 0 1 0 1 4 16 - run $n 10$MB 1 0 1 1 1 1 0 1 4 16 - run $n 10$MB 1 1 1 1 1 1 0 1 4 16 + run $n 1000$MB 10$M 1 0 1 1 0 0 0 1 4 16 + run $n 1000$MB 10$M 1 1 1 1 0 0 0 1 4 16 + run $n 1000$MB 10$M 1 0 1 1 1 0 0 1 4 16 + run $n 1000$MB 10$M 1 1 1 1 1 0 0 1 4 16 + run $n 1000$MB 10$M 1 0 1 1 0 1 0 1 4 16 + run $n 1000$MB 10$M 1 1 1 1 0 1 0 1 4 16 + run $n 1000$MB 10$M 1 0 1 1 1 1 0 1 4 16 + run $n 1000$MB 10$M 1 1 1 1 1 1 0 1 4 16 - echo cross - run $n 10$MB 1 0 1 1 0 0 1 1 4 16 - run $n 10$MB 1 1 1 1 0 0 1 1 4 16 - run $n 10$MB 1 0 1 1 0 1 1 1 4 16 - run $n 10$MB 1 1 1 1 0 1 1 1 4 16 +# echo cross +# run $n 1000$MB 10$M 1 0 1 1 0 0 1 1 4 16 +# run $n 1000$MB 10$M 1 1 1 1 0 0 1 1 4 16 +# run $n 1000$MB 10$M 1 0 1 1 0 1 1 1 4 16 +# run $n 1000$MB 10$M 1 1 1 1 0 1 1 1 4 16 done + +else + +#run 1 1000$MB 10$M 1 1 1 1 0 1 0 1 4 16 +#run 2 1000$MB 10$M 1 1 1 1 0 1 0 1 4 16 +#run 3 1000$MB 10$M 1 1 1 1 0 1 0 1 4 16 +#run 4 1000$MB 10$M 1 1 1 1 0 1 0 1 4 16 +#run 8 1000$MB 10$M 1 1 1 1 0 1 0 1 4 16 +#run 12 1000$MB 10$M 1 1 1 1 0 1 0 1 4 16 +#run 16 1000$MB 10$M 1 1 1 1 0 1 0 1 4 16 + +run 1 1000$MB 10$M 1 1 1 1 0 0 0 1 4 16 +#run 2 1000$MB 10$M 1 1 1 1 0 0 0 1 4 16 +#run 3 1000$MB 10$M 1 1 1 1 0 0 0 1 4 16 +#run 4 1000$MB 10$M 1 1 1 1 0 0 0 1 4 16 +#run 8 1000$MB 10$M 1 1 1 1 0 0 0 1 4 16 +#run 12 1000$MB 10$M 1 1 1 1 0 0 0 1 4 16 +run 16 1000$MB 10$M 1 1 1 1 0 0 0 1 4 16 + +fi Modified: numa-bench/trunk/src/chew.cc =================================================================== --- numa-bench/trunk/src/chew.cc 2008-03-04 06:31:56 UTC (rev 600) +++ numa-bench/trunk/src/chew.cc 2008-03-05 05:49:10 UTC (rev 601) @@ -3,6 +3,8 @@ #include <sched.h> +#include <numa.h> + #include <boost/bind.hpp> #include <commons/check.h> @@ -15,8 +17,12 @@ using namespace commons; using namespace std; -pthread_barrier_t cross_barrier; +// TODO Make into command line flags? +const bool debug = false, pretouch = false, do_warmup = false, use_numa = false; +pthread_barrier_t cross_barrier, startup_barrier; pthread_mutex_t iomutex; +void*** partitions; +int global_sum; struct config { @@ -31,6 +37,11 @@ const size_t size; /** + * The number of operations. + */ + const unsigned int opcount; + + /** * Number of repetitions to chew. */ const unsigned int nreps; @@ -86,16 +97,25 @@ const unsigned int ncpus; }; -void*** partitions; -int global_sum; +void * +alloc(size_t sz) +{ + return use_numa ? numa_alloc_local(sz) : malloc(sz); +} +void +dealloc(void *p, size_t sz) +{ + return use_numa ? numa_free(p, sz) : free(p); +} + /** * \param p The buffer to chew. * \param config The experiment configuration. * \param len Length of the buffer. */ void -chew1(void* pp, config config, size_t len) +chew1(void* pp, config config, size_t len, unsigned int seed) { int* p = (int*) pp; const size_t count = len / sizeof(int); @@ -104,14 +124,14 @@ // TODO: see these with random numbers generated from a global (serial) rand // NOTE: Using rand as the index assumes that rand generates large-enough // values. - posix_rand rand(current_time_millis() ^ gettid()); + posix_rand rand(current_time_millis() ^ gettid() ^ seed); if (config.write) { // Write to the region. if (config.shuffle) { // Random access into the memory region. for (unsigned int c = 0; c < config.nreps; c++) { - for (size_t i = 0; i < count; i++) { + for (size_t i = 0; i < config.opcount; i++) { int r = rand(); sum += p[r % count] += r; } @@ -119,7 +139,7 @@ } else { // Sequential scan through the memory region. for (unsigned int c = 0; c < config.nreps; c++) { - for (size_t i = 0; i < count; i++) { + for (size_t i = 0; i < config.opcount; i++) { sum += p[i] += rand(); } } @@ -129,14 +149,14 @@ if (config.shuffle) { // Random access into the memory region. for (unsigned int c = 0; c < config.nreps; c++) { - for (size_t i = 0; i < count; i++) { + for (size_t i = 0; i < config.opcount; i++) { sum += p[rand() % count]; } } } else { // Sequential scan through the memory region. for (unsigned int c = 0; c < config.nreps; c++) { - for (size_t i = 0; i < count; i++) { + for (size_t i = 0; i < config.opcount; i++) { sum += p[i] + rand(); } } @@ -166,7 +186,29 @@ pin_thread(cpu); } - void* p = config.local ? malloc(config.size) : pp; + void* p = config.local ? alloc(config.size) : pp; + if (pretouch) { + int *is = (int*) p; + for (size_t i = 0; i < config.size / sizeof(int); i++) { + is[i] = i; + } + int sum = 0; + for (size_t i = 0; i < config.size / sizeof(int); i++) { + sum += is[i]; + } + global_sum += sum; + } + if (debug) { + check(pthread_mutex_lock(&iomutex) == 0); + cout << worker << " alloc " << p << endl; + check(pthread_mutex_unlock(&iomutex) == 0); + } + if (!warmup) { + int barrier_result = pthread_barrier_wait(&startup_barrier); + check(barrier_result == PTHREAD_BARRIER_SERIAL_THREAD || + barrier_result == 0); + } + posix_rand rand(current_time_millis()); timer t(": "); if (!warmup && config.cross) { @@ -180,10 +222,10 @@ // TODO: make this more interesting than just a sequential traversal over // the partitions. for (unsigned int i = 0; i < config.nworkers; i++) { - chew1(partitions[i][worker], config, len); + chew1(partitions[i][worker], config, len, rand()); } } else { - chew1(p, config, config.size); + chew1(p, config, config.size, rand()); } // Print the elapsed time and "result". @@ -193,7 +235,7 @@ t.print(); check(pthread_mutex_unlock(&iomutex) == 0); - if (config.local) free(p); + if (config.local) dealloc(p, config.size); return NULL; } @@ -206,8 +248,8 @@ if (argc < 13) { cerr << argv[0] << - " <nworkers> <size> <nreps> <shuffle> <par> <pin>" - " <local> <write> <rrnodes> <nnodes>" << endl; + " <nworkers> <size> <opcount> <nreps> <shuffle> <par> <pin>" + " <local> <write> <rrnodes> <nnodes> <ncpus>" << endl; return 1; } @@ -224,12 +266,14 @@ atoi(argv[ 9]), atoi(argv[10]), atoi(argv[11]), - atoi(argv[12]) + atoi(argv[12]), + atoi(argv[13]) }; cout << "config:" << " nworkers " << config.nworkers << " size " << config.size + << " opcount " << config.opcount << " nreps " << config.nreps << " shuffle " << config.shuffle << " par " << config.par @@ -242,6 +286,8 @@ << " ncpus " << config.ncpus << endl; + check(config.shuffle || config.opcount <= config.size / sizeof(int)); + void *p = malloc(config.size); check(p != NULL); @@ -254,12 +300,13 @@ } // Warmup. - chew(p, 0, config, true); + if (do_warmup) chew(p, 0, config, true); if (config.par) { // Chew the memory area from each core in parallel (and also chew own). pthread_t ts[config.nworkers]; check(0 == pthread_barrier_init(&cross_barrier, NULL, config.nworkers)); + check(0 == pthread_barrier_init(&startup_barrier, NULL, config.nworkers)); for (unsigned int i = 0; i < config.nworkers; i++) { ts[i] = spawn(bind(chew, p, i, ref(config), false)); } @@ -267,6 +314,7 @@ check(pthread_join(ts[i], NULL) == 0); } check(0 == pthread_barrier_destroy(&cross_barrier)); + check(0 == pthread_barrier_destroy(&startup_barrier)); } else { // Chew the memory area from each core in sequence. for (unsigned int i = 0; i < config.nworkers; i++) { @@ -278,6 +326,8 @@ ofstream trash("/dev/null"); trash << "result: " << global_sum << endl; + cout << "result: " << global_sum << endl; + check(pthread_mutex_destroy(&iomutex) == 0); return 0; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |