[Assorted-commits] SF.net SVN: assorted: [401] numa-bench/trunk/src/malloc.cc
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-02-13 07:58:08
|
Revision: 401 http://assorted.svn.sourceforge.net/assorted/?rev=401&view=rev Author: yangzhang Date: 2008-02-12 23:58:13 -0800 (Tue, 12 Feb 2008) Log Message: ----------- beefed up the test Modified Paths: -------------- numa-bench/trunk/src/malloc.cc Modified: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc 2008-02-13 07:58:00 UTC (rev 400) +++ numa-bench/trunk/src/malloc.cc 2008-02-13 07:58:13 UTC (rev 401) @@ -1,7 +1,7 @@ // Questions this program answers: // // - Does malloc tend to allocate locally? -// - Yes. Times working from local node is lower. +// - TODO! // - How much does working from another node affect throughput? // - A bit: 647x from local, 649x from neighbor, 651x from remote // - Is there difference from repeatedly fetching the same (large) area n times @@ -21,6 +21,7 @@ #include <cstdlib> #include <iostream> +#include <iomanip> #include <sched.h> @@ -35,77 +36,166 @@ using namespace commons; using namespace std; +struct config +{ + /** + * The number of cores to test. This is a parameter (rather than + * auto-detected) because it additionally serves to mean the number of cores + * we want to test in parallel. As this program evolves, these may be + * separated. + */ + const int ncores; + + /** + * Size in bytes of the buffer to chew. + */ + const size_t size; + + /** + * Number of repetitions to chew. + */ + const int nreps; + + /** + * Perform rand access, otherwise sequential scan. + */ + const bool shuffle; + + /** + * Chew in parallel, otherwise each core chews serially. + */ + const bool par; + + /** + * Pin thread i to core i, otherwise let the OS manage things. + */ + const bool pin; + + /** + * Chew my own memory, otherwise chew the given (shared) memory. + */ + const bool local; + + /** + * Do writes, otherwise just do reads. + */ + const bool write; +}; + /** * \param pp The start of the buffer to chew. - * \param core Which core to pin our thread to. - * \param size The size of the buffer. - * \param nreps The number of times to chew through the buffer. - * \param shuffle If false, sequentially chew through; otherwise, randomly - * shuffle the indexes we chew through. + * \param cpu Which CPU to pin our thread to. + * \param config The experiment configuration parameters. */ void* -chew(void* pp, unsigned int core, size_t size, unsigned int nreps, bool shuffle) +chew(void* pp, unsigned int cpu, const config & config, const char* label) { - int* p = (int*) pp; - const size_t count = size / sizeof(int); - pid_t pid = gettid(); + int* p = (int*) (config.local ? malloc(config.size) : pp); + const size_t count = config.size / sizeof(int); timer t(": "); - // Pin this thread to core `core`. - cpu_set_t cs; - CPU_ZERO(&cs); - CPU_SET(core, &cs); - sched_setaffinity(pid, sizeof(cs), &cs); + // Pin this thread to cpu `cpu`. + if (config.pin) { + pin_thread(cpu); + } - // Write sequentially to the memory region. - if (shuffle) { - for (unsigned int c = 0; c < nreps; c++) { - for (size_t i = 0; i < count; i++) { - // NOTE: Using r as the index assumes that rand generates large-enough - // values. - int r = rand(); - p[r % count] += r; + if (config.write) { + // Write to the region. + if (config.shuffle) { + // Random access into the memory region. + for (unsigned int c = 0; c < config.nreps; c++) { + for (size_t i = 0; i < count; i++) { + // NOTE: Using r as the index assumes that rand generates large-enough + // values. + int r = rand(); + p[r % count] += r; + } } + } else { + // Sequential scan through the memory region. + for (unsigned int c = 0; c < config.nreps; c++) { + for (size_t i = 0; i < count; i++) { + p[i] += rand(); + } + } } } else { - for (unsigned int c = 0; c < nreps; c++) { - for (size_t i = 0; i < count; i++) { - p[i] += rand(); + // Only read from the region. + int sum = 0; + if (config.shuffle) { + // Random access into the memory region. + for (unsigned int c = 0; c < config.nreps; c++) { + for (size_t i = 0; i < count; i++) { + // NOTE: Using r as the index assumes that rand generates large-enough + // values. + sum += p[rand() % count]; + } } + } else { + // Sequential scan through the memory region. + for (unsigned int c = 0; c < config.nreps; c++) { + for (size_t i = 0; i < count; i++) { + sum += p[i] + rand(); + } + } } + cout << sum << endl; } // Print the elapsed time. - cout << core; + cout << label << cpu; t.print(); + + if (config.local) free(p); + return NULL; } int main(int argc, char** argv) { - if (argc < 5) { - cerr << argv[0] << " <ncores> <size> <nreps> <shuffle>" << endl; + // So that our global shared malloc takes place on the CPU 0's node. + pin_thread(0); + + if (argc < 9) { + cerr << argv[0] << + " <ncores> <size> <nreps> <shuffle> <par> <pin> <local> <write>" << endl; return 1; } - // Parse command-line arguments. - const int ncores = atoi(argv[1]); - const size_t size = atoi(argv[2]); - const int nreps = atoi(argv[3]); - const bool shuffle = atoi(argv[4]); + // Parse command-line arguments. TODO + const config config = { + atoi(argv[1]), + atoi(argv[2]), + atoi(argv[3]), + atoi(argv[4]), + atoi(argv[5]), + atoi(argv[6]), + atoi(argv[7]), + atoi(argv[8]) + }; - void *p = malloc(size); + checkmsg(RAND_MAX > config.size / sizeof(int), "PRNG range not large enough"); + void *p = malloc(config.size); + // Warmup. - cout << "warmup: "; - chew(p, 0, size, nreps, shuffle); + chew(p, 0, config, "warmup: "); - // Chew the memory area from each core. - for (int i = 0; i < ncores; i++) { - pthread_t t; - check((t = spawn(bind(chew, p, i, size, nreps, shuffle))) != 0); - check(pthread_join(t, NULL) == 0); + if (config.par) { + // Chew the memory area from each core in parallel (and also chew own). + pthread_t ts[config.ncores]; + for (int i = 0; i < config.ncores; i++) { + ts[i] = spawn(bind(chew, p, i, ref(config), "")); + } + for (int i = 0; i < config.ncores; i++) { + check(pthread_join(ts[i], NULL) == 0); + } + } else { + // Chew the memory area from each core in sequence. + for (int i = 0; i < config.ncores; i++) { + chew(p, i, config, ""); + } } free(p); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |