Thread: [Assorted-commits] SF.net SVN: assorted: [365] numa-bench/trunk/src/malloc.cc
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-02-11 04:57:26
|
Revision: 365 http://assorted.svn.sourceforge.net/assorted/?rev=365&view=rev Author: yangzhang Date: 2008-02-10 20:57:31 -0800 (Sun, 10 Feb 2008) Log Message: ----------- added cpu pinning to malloc Modified Paths: -------------- numa-bench/trunk/src/malloc.cc Modified: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc 2008-02-11 04:57:12 UTC (rev 364) +++ numa-bench/trunk/src/malloc.cc 2008-02-11 04:57:31 UTC (rev 365) @@ -5,17 +5,21 @@ #include <sched.h> +#include <boost/bind.hpp> + #include <commons/check.h> #include <commons/threads.h> #include <commons/time.h> +#include <commons/boost/threads.h> +using namespace boost; using namespace commons; using namespace std; const size_t size = 10000000; void* -chew(void* pp) +chew(void* pp, int core) { char* p = (char*) pp; const int reps = 100; @@ -25,17 +29,16 @@ // Pin this thread to the right processor. cpu_set_t cs; CPU_ZERO(&cs); - CPU_SET(1, &cs); + CPU_SET(core, &cs); sched_setaffinity(pid, sizeof(cs), &cs); - // TODO: try shuffling indexes for (int c = 0; c < reps; c++) { for (size_t i = 0; i < size; i++) { p[i] = i; } } - // Print the elapsed time; + // Print the elapsed time. cout << pid; t.print(); return NULL; @@ -53,21 +56,23 @@ void *p = malloc(size); // warmup - chew(p); + chew(p, 0); pthread_t ts[n]; // start thread on each core for (int i = 0; i < n; i++) { - check(pthread_create(&ts[i], NULL, chew, p) == 0); + pthread_t t; + check((t = spawn(bind(chew, p, i))) != 0); + check(pthread_join(t, NULL) == 0); } - waitall(ts, n); + // waitall(ts, n); return 0; // THRASH // spawn workers for (int i = 0; i < n; i++) { - check(pthread_create(&ts[i], NULL, chew, p) == 0); + check((ts[i] = spawn(bind(chew, p, i))) == 0); } waitall(ts, n); return 0; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-12 01:48:02
|
Revision: 386 http://assorted.svn.sourceforge.net/assorted/?rev=386&view=rev Author: yangzhang Date: 2008-02-11 17:47:59 -0800 (Mon, 11 Feb 2008) Log Message: ----------- nice malloc test Modified Paths: -------------- numa-bench/trunk/src/malloc.cc Modified: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc 2008-02-12 01:22:31 UTC (rev 385) +++ numa-bench/trunk/src/malloc.cc 2008-02-12 01:47:59 UTC (rev 386) @@ -1,5 +1,22 @@ -// Does malloc tend to allocate locally? +// Questions this program answers: +// +// - Does malloc tend to allocate locally? +// - Yes. Times working from local node is lower. +// - How much does working from another node affect throughput? +// - A bit: 647x from local, 649x from neighbor, 651x from remote +// - Is there difference from repeatedly fetching the same (large) area n times +// vs. fetching an area n times larger? +// - No. The times are identical for 1GB*1 and 100MB*10. +// - How much difference is there between sequential scan and random access? +// - Huge difference. Also magnifies the locality effects more. +// - 1700 from local, 1990 from one neighbor, 2020 from another neighbor, +// and 2310 from remote. +// - Can we observe prefetching's effects? (Random access but chew the full +// cache line of data.) +// - TODO! +// TODO: use real shuffling? or is rand ok? + #include <cstdlib> #include <iostream> @@ -16,30 +33,44 @@ using namespace commons; using namespace std; -const size_t size = 10000000; - +/** + * \param pp The start of the buffer to chew. + * \param core Which core to pin our thread to. + * \param size The size of the buffer. + * \param nreps The number of times to chew through the buffer. + * \param shuffle If false, sequentially chew through; otherwise, randomly + * shuffle the indexes we chew through. + */ void* -chew(void* pp, int core) +chew(void* pp, int core, size_t size, int nreps, bool shuffle) { char* p = (char*) pp; - const int reps = 100; pid_t pid = gettid(); timer t(": "); - // Pin this thread to the right processor. + // Pin this thread to core `core`. cpu_set_t cs; CPU_ZERO(&cs); CPU_SET(core, &cs); sched_setaffinity(pid, sizeof(cs), &cs); - for (int c = 0; c < reps; c++) { - for (size_t i = 0; i < size; i++) { - p[i] = i; + // Write sequentially to the memory region. + if (shuffle) { + for (int c = 0; c < nreps; c++) { + for (size_t i = 0; i < size; i++) { + p[rand() % size] = i; + } } + } else { + for (int c = 0; c < nreps; c++) { + for (size_t i = 0; i < size; i++) { + p[i] = i; + } + } } // Print the elapsed time. - cout << pid; + cout << core; t.print(); return NULL; } @@ -47,33 +78,31 @@ int main(int argc, char** argv) { - if (argc < 2) { - cerr << "malloc <nthreads>" << endl; + if (argc < 5) { + cerr << argv[0] << " <ncores> <size> <nreps> <shuffle>" << endl; return 1; } - const int n = atoi(argv[1]); + // Parse command-line arguments. + const int ncores = atoi(argv[1]); + const size_t size = atoi(argv[2]); + const int nreps = atoi(argv[3]); + const bool shuffle = atoi(argv[4]); + void *p = malloc(size); - // warmup - chew(p, 0); - pthread_t ts[n]; + // Warmup. + cout << "warmup: "; + chew(p, 0, size, nreps, shuffle); - // start thread on each core - for (int i = 0; i < n; i++) { + // Chew the memory area from each core. + for (int i = 0; i < ncores; i++) { pthread_t t; - check((t = spawn(bind(chew, p, i))) != 0); + check((t = spawn(bind(chew, p, i, size, nreps, shuffle))) != 0); check(pthread_join(t, NULL) == 0); } - // waitall(ts, n); - return 0; - // THRASH + free(p); - // spawn workers - for (int i = 0; i < n; i++) { - check((ts[i] = spawn(bind(chew, p, i))) == 0); - } - waitall(ts, n); return 0; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-13 03:26:39
|
Revision: 398 http://assorted.svn.sourceforge.net/assorted/?rev=398&view=rev Author: yangzhang Date: 2008-02-12 19:26:37 -0800 (Tue, 12 Feb 2008) Log Message: ----------- calling rand() in seq to make the test more fair; writing by ints not chars; added some notes Modified Paths: -------------- numa-bench/trunk/src/malloc.cc Modified: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc 2008-02-12 22:15:13 UTC (rev 397) +++ numa-bench/trunk/src/malloc.cc 2008-02-13 03:26:37 UTC (rev 398) @@ -11,6 +11,8 @@ // - Huge difference. Also magnifies the locality effects more. // - 1700 from local, 1990 from one neighbor, 2020 from another neighbor, // and 2310 from remote. +// - What's the difference between reading and writing? +// - TODO! // - Can we observe prefetching's effects? (Random access but chew the full // cache line of data.) // - TODO! @@ -42,9 +44,10 @@ * shuffle the indexes we chew through. */ void* -chew(void* pp, int core, size_t size, int nreps, bool shuffle) +chew(void* pp, unsigned int core, size_t size, unsigned int nreps, bool shuffle) { - char* p = (char*) pp; + int* p = (int*) pp; + const size_t count = size / sizeof(int); pid_t pid = gettid(); timer t(": "); @@ -56,15 +59,18 @@ // Write sequentially to the memory region. if (shuffle) { - for (int c = 0; c < nreps; c++) { - for (size_t i = 0; i < size; i++) { - p[rand() % size] = i; + for (unsigned int c = 0; c < nreps; c++) { + for (size_t i = 0; i < count; i++) { + // NOTE: Using r as the index assumes that rand generates large-enough + // values. + int r = rand(); + p[r % count] += r; } } } else { - for (int c = 0; c < nreps; c++) { - for (size_t i = 0; i < size; i++) { - p[i] = i; + for (unsigned int c = 0; c < nreps; c++) { + for (size_t i = 0; i < count; i++) { + p[i] += rand(); } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-13 07:58:08
|
Revision: 401 http://assorted.svn.sourceforge.net/assorted/?rev=401&view=rev Author: yangzhang Date: 2008-02-12 23:58:13 -0800 (Tue, 12 Feb 2008) Log Message: ----------- beefed up the test Modified Paths: -------------- numa-bench/trunk/src/malloc.cc Modified: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc 2008-02-13 07:58:00 UTC (rev 400) +++ numa-bench/trunk/src/malloc.cc 2008-02-13 07:58:13 UTC (rev 401) @@ -1,7 +1,7 @@ // Questions this program answers: // // - Does malloc tend to allocate locally? -// - Yes. Times working from local node is lower. +// - TODO! // - How much does working from another node affect throughput? // - A bit: 647x from local, 649x from neighbor, 651x from remote // - Is there difference from repeatedly fetching the same (large) area n times @@ -21,6 +21,7 @@ #include <cstdlib> #include <iostream> +#include <iomanip> #include <sched.h> @@ -35,77 +36,166 @@ using namespace commons; using namespace std; +struct config +{ + /** + * The number of cores to test. This is a parameter (rather than + * auto-detected) because it additionally serves to mean the number of cores + * we want to test in parallel. As this program evolves, these may be + * separated. + */ + const int ncores; + + /** + * Size in bytes of the buffer to chew. + */ + const size_t size; + + /** + * Number of repetitions to chew. + */ + const int nreps; + + /** + * Perform rand access, otherwise sequential scan. + */ + const bool shuffle; + + /** + * Chew in parallel, otherwise each core chews serially. + */ + const bool par; + + /** + * Pin thread i to core i, otherwise let the OS manage things. + */ + const bool pin; + + /** + * Chew my own memory, otherwise chew the given (shared) memory. + */ + const bool local; + + /** + * Do writes, otherwise just do reads. + */ + const bool write; +}; + /** * \param pp The start of the buffer to chew. - * \param core Which core to pin our thread to. - * \param size The size of the buffer. - * \param nreps The number of times to chew through the buffer. - * \param shuffle If false, sequentially chew through; otherwise, randomly - * shuffle the indexes we chew through. + * \param cpu Which CPU to pin our thread to. + * \param config The experiment configuration parameters. */ void* -chew(void* pp, unsigned int core, size_t size, unsigned int nreps, bool shuffle) +chew(void* pp, unsigned int cpu, const config & config, const char* label) { - int* p = (int*) pp; - const size_t count = size / sizeof(int); - pid_t pid = gettid(); + int* p = (int*) (config.local ? malloc(config.size) : pp); + const size_t count = config.size / sizeof(int); timer t(": "); - // Pin this thread to core `core`. - cpu_set_t cs; - CPU_ZERO(&cs); - CPU_SET(core, &cs); - sched_setaffinity(pid, sizeof(cs), &cs); + // Pin this thread to cpu `cpu`. + if (config.pin) { + pin_thread(cpu); + } - // Write sequentially to the memory region. - if (shuffle) { - for (unsigned int c = 0; c < nreps; c++) { - for (size_t i = 0; i < count; i++) { - // NOTE: Using r as the index assumes that rand generates large-enough - // values. - int r = rand(); - p[r % count] += r; + if (config.write) { + // Write to the region. + if (config.shuffle) { + // Random access into the memory region. + for (unsigned int c = 0; c < config.nreps; c++) { + for (size_t i = 0; i < count; i++) { + // NOTE: Using r as the index assumes that rand generates large-enough + // values. + int r = rand(); + p[r % count] += r; + } } + } else { + // Sequential scan through the memory region. + for (unsigned int c = 0; c < config.nreps; c++) { + for (size_t i = 0; i < count; i++) { + p[i] += rand(); + } + } } } else { - for (unsigned int c = 0; c < nreps; c++) { - for (size_t i = 0; i < count; i++) { - p[i] += rand(); + // Only read from the region. + int sum = 0; + if (config.shuffle) { + // Random access into the memory region. + for (unsigned int c = 0; c < config.nreps; c++) { + for (size_t i = 0; i < count; i++) { + // NOTE: Using r as the index assumes that rand generates large-enough + // values. + sum += p[rand() % count]; + } } + } else { + // Sequential scan through the memory region. + for (unsigned int c = 0; c < config.nreps; c++) { + for (size_t i = 0; i < count; i++) { + sum += p[i] + rand(); + } + } } + cout << sum << endl; } // Print the elapsed time. - cout << core; + cout << label << cpu; t.print(); + + if (config.local) free(p); + return NULL; } int main(int argc, char** argv) { - if (argc < 5) { - cerr << argv[0] << " <ncores> <size> <nreps> <shuffle>" << endl; + // So that our global shared malloc takes place on the CPU 0's node. + pin_thread(0); + + if (argc < 9) { + cerr << argv[0] << + " <ncores> <size> <nreps> <shuffle> <par> <pin> <local> <write>" << endl; return 1; } - // Parse command-line arguments. - const int ncores = atoi(argv[1]); - const size_t size = atoi(argv[2]); - const int nreps = atoi(argv[3]); - const bool shuffle = atoi(argv[4]); + // Parse command-line arguments. TODO + const config config = { + atoi(argv[1]), + atoi(argv[2]), + atoi(argv[3]), + atoi(argv[4]), + atoi(argv[5]), + atoi(argv[6]), + atoi(argv[7]), + atoi(argv[8]) + }; - void *p = malloc(size); + checkmsg(RAND_MAX > config.size / sizeof(int), "PRNG range not large enough"); + void *p = malloc(config.size); + // Warmup. - cout << "warmup: "; - chew(p, 0, size, nreps, shuffle); + chew(p, 0, config, "warmup: "); - // Chew the memory area from each core. - for (int i = 0; i < ncores; i++) { - pthread_t t; - check((t = spawn(bind(chew, p, i, size, nreps, shuffle))) != 0); - check(pthread_join(t, NULL) == 0); + if (config.par) { + // Chew the memory area from each core in parallel (and also chew own). + pthread_t ts[config.ncores]; + for (int i = 0; i < config.ncores; i++) { + ts[i] = spawn(bind(chew, p, i, ref(config), "")); + } + for (int i = 0; i < config.ncores; i++) { + check(pthread_join(ts[i], NULL) == 0); + } + } else { + // Chew the memory area from each core in sequence. + for (int i = 0; i < config.ncores; i++) { + chew(p, i, config, ""); + } } free(p); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-13 07:59:08
|
Revision: 403 http://assorted.svn.sourceforge.net/assorted/?rev=403&view=rev Author: yangzhang Date: 2008-02-12 23:59:10 -0800 (Tue, 12 Feb 2008) Log Message: ----------- tweak Modified Paths: -------------- numa-bench/trunk/src/malloc.cc Modified: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc 2008-02-13 07:58:40 UTC (rev 402) +++ numa-bench/trunk/src/malloc.cc 2008-02-13 07:59:10 UTC (rev 403) @@ -21,7 +21,6 @@ #include <cstdlib> #include <iostream> -#include <iomanip> #include <sched.h> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-13 08:14:07
|
Revision: 404 http://assorted.svn.sourceforge.net/assorted/?rev=404&view=rev Author: yangzhang Date: 2008-02-13 00:14:10 -0800 (Wed, 13 Feb 2008) Log Message: ----------- added config logging; added result (sum) printing Modified Paths: -------------- numa-bench/trunk/src/malloc.cc Modified: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc 2008-02-13 07:59:10 UTC (rev 403) +++ numa-bench/trunk/src/malloc.cc 2008-02-13 08:14:10 UTC (rev 404) @@ -98,6 +98,7 @@ pin_thread(cpu); } + int sum = 0; if (config.write) { // Write to the region. if (config.shuffle) { @@ -107,20 +108,19 @@ // NOTE: Using r as the index assumes that rand generates large-enough // values. int r = rand(); - p[r % count] += r; + sum += p[r % count] += r; } } } else { // Sequential scan through the memory region. for (unsigned int c = 0; c < config.nreps; c++) { for (size_t i = 0; i < count; i++) { - p[i] += rand(); + sum += p[i] += rand(); } } } } else { // Only read from the region. - int sum = 0; if (config.shuffle) { // Random access into the memory region. for (unsigned int c = 0; c < config.nreps; c++) { @@ -138,12 +138,12 @@ } } } - cout << sum << endl; } - // Print the elapsed time. + // Print the elapsed time and "result". cout << label << cpu; t.print(); + cout << "result: " << sum; if (config.local) free(p); @@ -174,6 +174,16 @@ atoi(argv[8]) }; + cout << "config:" + << " ncores " << config.ncores + << " size " << config.size + << " nreps " << config.nreps + << " shuffle " << config.shuffle + << " par " << config.par + << " pin " << config.pin + << " local " << config.local + << " write " << config.write << endl; + checkmsg(RAND_MAX > config.size / sizeof(int), "PRNG range not large enough"); void *p = malloc(config.size); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-13 17:53:35
|
Revision: 407 http://assorted.svn.sourceforge.net/assorted/?rev=407&view=rev Author: yangzhang Date: 2008-02-13 09:53:30 -0800 (Wed, 13 Feb 2008) Log Message: ----------- added cross-comm Modified Paths: -------------- numa-bench/trunk/src/malloc.cc Modified: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc 2008-02-13 17:53:16 UTC (rev 406) +++ numa-bench/trunk/src/malloc.cc 2008-02-13 17:53:30 UTC (rev 407) @@ -20,6 +20,7 @@ // TODO: use real shuffling? or is rand ok? #include <cstdlib> +#include <fstream> #include <iostream> #include <sched.h> @@ -35,6 +36,8 @@ using namespace commons; using namespace std; +pthread_barrier_t cross_barrier; + struct config { /** @@ -79,25 +82,27 @@ * Do writes, otherwise just do reads. */ const bool write; + + /** + * Test cross-communication (use partitions), otherwise use either the + * global/local buffer. + */ + const bool cross; }; +void*** partitions; +int global_sum; + /** - * \param pp The start of the buffer to chew. - * \param cpu Which CPU to pin our thread to. - * \param config The experiment configuration parameters. + * \param p The buffer to chew. + * \param config The experiment configuration. + * \param len Length of the buffer. */ -void* -chew(void* pp, unsigned int cpu, const config & config, const char* label) +void +chew1(void* pp, config config, size_t len) { - int* p = (int*) (config.local ? malloc(config.size) : pp); - const size_t count = config.size / sizeof(int); - timer t(": "); - - // Pin this thread to cpu `cpu`. - if (config.pin) { - pin_thread(cpu); - } - + int* p = (int*) pp; + const size_t count = len / sizeof(int); int sum = 0; if (config.write) { // Write to the region. @@ -139,11 +144,44 @@ } } } + global_sum += sum; +} +/** + * \param pp The start of the buffer to chew. + * \param cpu Which CPU to pin our thread to. + * \param config The experiment configuration parameters. + * \param label Prefix for the elapsed time output. + */ +void* +chew(void* pp, unsigned int cpu, const config & config, bool warmup) +{ + // Pin this thread to cpu `cpu`. + if (config.pin) { + pin_thread(cpu); + } + + void* p = config.local ? malloc(config.size) : pp; + timer t(": "); + + if (!warmup && config.cross) { + size_t len = config.size / config.ncores; + for (int i = 0; i < config.ncores; i++) { + partitions[cpu][i] = new char[len]; + } + int barrier_result = pthread_barrier_wait(&cross_barrier); + check(barrier_result == PTHREAD_BARRIER_SERIAL_THREAD || barrier_result == 0); + for (int i = 0; i < config.ncores; i++) { + chew1(partitions[i][cpu], config, len); + } + } else { + chew1(p, config, config.size); + } + // Print the elapsed time and "result". - cout << label << cpu; + if (warmup) cout << "warmup: " << endl; + cout << cpu; t.print(); - cout << "result: " << sum; if (config.local) free(p); @@ -156,7 +194,7 @@ // So that our global shared malloc takes place on the CPU 0's node. pin_thread(0); - if (argc < 9) { + if (argc < 10) { cerr << argv[0] << " <ncores> <size> <nreps> <shuffle> <par> <pin> <local> <write>" << endl; return 1; @@ -171,7 +209,8 @@ atoi(argv[5]), atoi(argv[6]), atoi(argv[7]), - atoi(argv[8]) + atoi(argv[8]), + atoi(argv[9]) }; cout << "config:" @@ -182,24 +221,34 @@ << " par " << config.par << " pin " << config.pin << " local " << config.local - << " write " << config.write << endl; + << " write " << config.write + << " cross " << config.cross << endl; checkmsg(RAND_MAX > config.size / sizeof(int), "PRNG range not large enough"); void *p = malloc(config.size); + check(p != NULL); + if (config.cross) { + partitions = new void**[config.ncores]; + for (unsigned int i = 0; i < config.ncores; i++) + partitions[i] = new void*[config.ncores]; + } + // Warmup. - chew(p, 0, config, "warmup: "); + chew(p, 0, config, true); if (config.par) { // Chew the memory area from each core in parallel (and also chew own). pthread_t ts[config.ncores]; + check(0 == pthread_barrier_init(&cross_barrier, NULL, config.ncores)); for (int i = 0; i < config.ncores; i++) { - ts[i] = spawn(bind(chew, p, i, ref(config), "")); + ts[i] = spawn(bind(chew, p, i, ref(config), false)); } for (int i = 0; i < config.ncores; i++) { check(pthread_join(ts[i], NULL) == 0); } + check(0 == pthread_barrier_destroy(&cross_barrier)); } else { // Chew the memory area from each core in sequence. for (int i = 0; i < config.ncores; i++) { @@ -208,6 +257,8 @@ } free(p); + ofstream trash("/dev/null"); + trash << "result: " << global_sum << endl; return 0; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-13 18:04:06
|
Revision: 409 http://assorted.svn.sourceforge.net/assorted/?rev=409&view=rev Author: yangzhang Date: 2008-02-13 10:03:22 -0800 (Wed, 13 Feb 2008) Log Message: ----------- fixed warmup messages Modified Paths: -------------- numa-bench/trunk/src/malloc.cc Modified: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc 2008-02-13 18:03:09 UTC (rev 408) +++ numa-bench/trunk/src/malloc.cc 2008-02-13 18:03:22 UTC (rev 409) @@ -179,7 +179,7 @@ } // Print the elapsed time and "result". - if (warmup) cout << "warmup: " << endl; + if (warmup) cout << "warmup: "; cout << cpu; t.print(); @@ -252,7 +252,7 @@ } else { // Chew the memory area from each core in sequence. for (int i = 0; i < config.ncores; i++) { - chew(p, i, config, ""); + chew(p, i, config, false); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-15 01:44:38
|
Revision: 419 http://assorted.svn.sourceforge.net/assorted/?rev=419&view=rev Author: yangzhang Date: 2008-02-14 17:44:42 -0800 (Thu, 14 Feb 2008) Log Message: ----------- cleaned up content Modified Paths: -------------- numa-bench/trunk/src/malloc.cc Modified: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc 2008-02-15 01:44:23 UTC (rev 418) +++ numa-bench/trunk/src/malloc.cc 2008-02-15 01:44:42 UTC (rev 419) @@ -1,24 +1,3 @@ -// Questions this program answers: -// -// - Does malloc tend to allocate locally? -// - TODO! -// - How much does working from another node affect throughput? -// - A bit: 647x from local, 649x from neighbor, 651x from remote -// - Is there difference from repeatedly fetching the same (large) area n times -// vs. fetching an area n times larger? -// - No. The times are identical for 1GB*1 and 100MB*10. -// - How much difference is there between sequential scan and random access? -// - Huge difference. Also magnifies the locality effects more. -// - 1700 from local, 1990 from one neighbor, 2020 from another neighbor, -// and 2310 from remote. -// - What's the difference between reading and writing? -// - TODO! -// - Can we observe prefetching's effects? (Random access but chew the full -// cache line of data.) -// - TODO! - -// TODO: use real shuffling? or is rand ok? - #include <cstdlib> #include <fstream> #include <iostream> @@ -171,6 +150,8 @@ } int barrier_result = pthread_barrier_wait(&cross_barrier); check(barrier_result == PTHREAD_BARRIER_SERIAL_THREAD || barrier_result == 0); + // TODO: make this more interesting than just a sequential traversal over + // the partitions. for (int i = 0; i < config.ncores; i++) { chew1(partitions[i][cpu], config, len); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-26 19:41:20
|
Revision: 512 http://assorted.svn.sourceforge.net/assorted/?rev=512&view=rev Author: yangzhang Date: 2008-02-26 11:41:17 -0800 (Tue, 26 Feb 2008) Log Message: ----------- added custom rng Modified Paths: -------------- numa-bench/trunk/src/malloc.cc Modified: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc 2008-02-26 19:41:06 UTC (rev 511) +++ numa-bench/trunk/src/malloc.cc 2008-02-26 19:41:17 UTC (rev 512) @@ -1,4 +1,3 @@ -#include <cstdlib> #include <fstream> #include <iostream> @@ -7,6 +6,7 @@ #include <boost/bind.hpp> #include <commons/check.h> +#include <commons/rand.h> #include <commons/threads.h> #include <commons/time.h> #include <commons/boost/threads.h> @@ -83,6 +83,7 @@ int* p = (int*) pp; const size_t count = len / sizeof(int); int sum = 0; + posix_rand rand(current_time_millis() ^ gettid()); if (config.write) { // Write to the region. if (config.shuffle) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |