[Assorted-commits] SF.net SVN: assorted: [386] numa-bench/trunk/src/malloc.cc
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-02-12 01:48:02
|
Revision: 386 http://assorted.svn.sourceforge.net/assorted/?rev=386&view=rev Author: yangzhang Date: 2008-02-11 17:47:59 -0800 (Mon, 11 Feb 2008) Log Message: ----------- nice malloc test Modified Paths: -------------- numa-bench/trunk/src/malloc.cc Modified: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc 2008-02-12 01:22:31 UTC (rev 385) +++ numa-bench/trunk/src/malloc.cc 2008-02-12 01:47:59 UTC (rev 386) @@ -1,5 +1,22 @@ -// Does malloc tend to allocate locally? +// Questions this program answers: +// +// - Does malloc tend to allocate locally? +// - Yes. Times working from local node is lower. +// - How much does working from another node affect throughput? +// - A bit: 647x from local, 649x from neighbor, 651x from remote +// - Is there difference from repeatedly fetching the same (large) area n times +// vs. fetching an area n times larger? +// - No. The times are identical for 1GB*1 and 100MB*10. +// - How much difference is there between sequential scan and random access? +// - Huge difference. Also magnifies the locality effects more. +// - 1700 from local, 1990 from one neighbor, 2020 from another neighbor, +// and 2310 from remote. +// - Can we observe prefetching's effects? (Random access but chew the full +// cache line of data.) +// - TODO! +// TODO: use real shuffling? or is rand ok? + #include <cstdlib> #include <iostream> @@ -16,30 +33,44 @@ using namespace commons; using namespace std; -const size_t size = 10000000; - +/** + * \param pp The start of the buffer to chew. + * \param core Which core to pin our thread to. + * \param size The size of the buffer. + * \param nreps The number of times to chew through the buffer. + * \param shuffle If false, sequentially chew through; otherwise, randomly + * shuffle the indexes we chew through. + */ void* -chew(void* pp, int core) +chew(void* pp, int core, size_t size, int nreps, bool shuffle) { char* p = (char*) pp; - const int reps = 100; pid_t pid = gettid(); timer t(": "); - // Pin this thread to the right processor. + // Pin this thread to core `core`. cpu_set_t cs; CPU_ZERO(&cs); CPU_SET(core, &cs); sched_setaffinity(pid, sizeof(cs), &cs); - for (int c = 0; c < reps; c++) { - for (size_t i = 0; i < size; i++) { - p[i] = i; + // Write sequentially to the memory region. + if (shuffle) { + for (int c = 0; c < nreps; c++) { + for (size_t i = 0; i < size; i++) { + p[rand() % size] = i; + } } + } else { + for (int c = 0; c < nreps; c++) { + for (size_t i = 0; i < size; i++) { + p[i] = i; + } + } } // Print the elapsed time. - cout << pid; + cout << core; t.print(); return NULL; } @@ -47,33 +78,31 @@ int main(int argc, char** argv) { - if (argc < 2) { - cerr << "malloc <nthreads>" << endl; + if (argc < 5) { + cerr << argv[0] << " <ncores> <size> <nreps> <shuffle>" << endl; return 1; } - const int n = atoi(argv[1]); + // Parse command-line arguments. + const int ncores = atoi(argv[1]); + const size_t size = atoi(argv[2]); + const int nreps = atoi(argv[3]); + const bool shuffle = atoi(argv[4]); + void *p = malloc(size); - // warmup - chew(p, 0); - pthread_t ts[n]; + // Warmup. + cout << "warmup: "; + chew(p, 0, size, nreps, shuffle); - // start thread on each core - for (int i = 0; i < n; i++) { + // Chew the memory area from each core. + for (int i = 0; i < ncores; i++) { pthread_t t; - check((t = spawn(bind(chew, p, i))) != 0); + check((t = spawn(bind(chew, p, i, size, nreps, shuffle))) != 0); check(pthread_join(t, NULL) == 0); } - // waitall(ts, n); - return 0; - // THRASH + free(p); - // spawn workers - for (int i = 0; i < n; i++) { - check((ts[i] = spawn(bind(chew, p, i))) == 0); - } - waitall(ts, n); return 0; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |