[Assorted-commits] SF.net SVN: assorted: [407] numa-bench/trunk/src/malloc.cc
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-02-13 17:53:35
|
Revision: 407 http://assorted.svn.sourceforge.net/assorted/?rev=407&view=rev Author: yangzhang Date: 2008-02-13 09:53:30 -0800 (Wed, 13 Feb 2008) Log Message: ----------- added cross-comm Modified Paths: -------------- numa-bench/trunk/src/malloc.cc Modified: numa-bench/trunk/src/malloc.cc =================================================================== --- numa-bench/trunk/src/malloc.cc 2008-02-13 17:53:16 UTC (rev 406) +++ numa-bench/trunk/src/malloc.cc 2008-02-13 17:53:30 UTC (rev 407) @@ -20,6 +20,7 @@ // TODO: use real shuffling? or is rand ok? #include <cstdlib> +#include <fstream> #include <iostream> #include <sched.h> @@ -35,6 +36,8 @@ using namespace commons; using namespace std; +pthread_barrier_t cross_barrier; + struct config { /** @@ -79,25 +82,27 @@ * Do writes, otherwise just do reads. */ const bool write; + + /** + * Test cross-communication (use partitions), otherwise use either the + * global/local buffer. + */ + const bool cross; }; +void*** partitions; +int global_sum; + /** - * \param pp The start of the buffer to chew. - * \param cpu Which CPU to pin our thread to. - * \param config The experiment configuration parameters. + * \param p The buffer to chew. + * \param config The experiment configuration. + * \param len Length of the buffer. */ -void* -chew(void* pp, unsigned int cpu, const config & config, const char* label) +void +chew1(void* pp, config config, size_t len) { - int* p = (int*) (config.local ? malloc(config.size) : pp); - const size_t count = config.size / sizeof(int); - timer t(": "); - - // Pin this thread to cpu `cpu`. - if (config.pin) { - pin_thread(cpu); - } - + int* p = (int*) pp; + const size_t count = len / sizeof(int); int sum = 0; if (config.write) { // Write to the region. @@ -139,11 +144,44 @@ } } } + global_sum += sum; +} +/** + * \param pp The start of the buffer to chew. + * \param cpu Which CPU to pin our thread to. + * \param config The experiment configuration parameters. + * \param label Prefix for the elapsed time output. + */ +void* +chew(void* pp, unsigned int cpu, const config & config, bool warmup) +{ + // Pin this thread to cpu `cpu`. + if (config.pin) { + pin_thread(cpu); + } + + void* p = config.local ? malloc(config.size) : pp; + timer t(": "); + + if (!warmup && config.cross) { + size_t len = config.size / config.ncores; + for (int i = 0; i < config.ncores; i++) { + partitions[cpu][i] = new char[len]; + } + int barrier_result = pthread_barrier_wait(&cross_barrier); + check(barrier_result == PTHREAD_BARRIER_SERIAL_THREAD || barrier_result == 0); + for (int i = 0; i < config.ncores; i++) { + chew1(partitions[i][cpu], config, len); + } + } else { + chew1(p, config, config.size); + } + // Print the elapsed time and "result". - cout << label << cpu; + if (warmup) cout << "warmup: " << endl; + cout << cpu; t.print(); - cout << "result: " << sum; if (config.local) free(p); @@ -156,7 +194,7 @@ // So that our global shared malloc takes place on the CPU 0's node. pin_thread(0); - if (argc < 9) { + if (argc < 10) { cerr << argv[0] << " <ncores> <size> <nreps> <shuffle> <par> <pin> <local> <write>" << endl; return 1; @@ -171,7 +209,8 @@ atoi(argv[5]), atoi(argv[6]), atoi(argv[7]), - atoi(argv[8]) + atoi(argv[8]), + atoi(argv[9]) }; cout << "config:" @@ -182,24 +221,34 @@ << " par " << config.par << " pin " << config.pin << " local " << config.local - << " write " << config.write << endl; + << " write " << config.write + << " cross " << config.cross << endl; checkmsg(RAND_MAX > config.size / sizeof(int), "PRNG range not large enough"); void *p = malloc(config.size); + check(p != NULL); + if (config.cross) { + partitions = new void**[config.ncores]; + for (unsigned int i = 0; i < config.ncores; i++) + partitions[i] = new void*[config.ncores]; + } + // Warmup. - chew(p, 0, config, "warmup: "); + chew(p, 0, config, true); if (config.par) { // Chew the memory area from each core in parallel (and also chew own). pthread_t ts[config.ncores]; + check(0 == pthread_barrier_init(&cross_barrier, NULL, config.ncores)); for (int i = 0; i < config.ncores; i++) { - ts[i] = spawn(bind(chew, p, i, ref(config), "")); + ts[i] = spawn(bind(chew, p, i, ref(config), false)); } for (int i = 0; i < config.ncores; i++) { check(pthread_join(ts[i], NULL) == 0); } + check(0 == pthread_barrier_destroy(&cross_barrier)); } else { // Chew the memory area from each core in sequence. for (int i = 0; i < config.ncores; i++) { @@ -208,6 +257,8 @@ } free(p); + ofstream trash("/dev/null"); + trash << "result: " << global_sum << endl; return 0; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |