[Assorted-commits] SF.net SVN: assorted: [407] numa-bench/trunk/src/malloc.cc

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 407
          http://assorted.svn.sourceforge.net/assorted/?rev=407&view=rev
Author:   yangzhang
Date:     2008-02-13 09:53:30 -0800 (Wed, 13 Feb 2008)

Log Message:
-----------
added cross-comm

Modified Paths:
--------------
    numa-bench/trunk/src/malloc.cc

Modified: numa-bench/trunk/src/malloc.cc
===================================================================

--- numa-bench/trunk/src/malloc.cc	2008-02-13 17:53:16 UTC (rev 406)
+++ numa-bench/trunk/src/malloc.cc	2008-02-13 17:53:30 UTC (rev 407)
@@ -20,6 +20,7 @@
 // TODO: use real shuffling? or is rand ok?
 
 #include <cstdlib>
+#include <fstream>
 #include <iostream>
 
 #include <sched.h>
@@ -35,6 +36,8 @@
 using namespace commons;
 using namespace std;
 
+pthread_barrier_t cross_barrier;
+
 struct config
 {
   /**
@@ -79,25 +82,27 @@
    * Do writes, otherwise just do reads.
    */
   const bool write;
+
+  /**
+   * Test cross-communication (use partitions), otherwise use either the
+   * global/local buffer.
+   */
+  const bool cross;
 };
 
+void*** partitions;
+int global_sum;
+
 /**
- * \param pp The start of the buffer to chew.
- * \param cpu Which CPU to pin our thread to.
- * \param config The experiment configuration parameters.
+ * \param p The buffer to chew.
+ * \param config The experiment configuration.
+ * \param len Length of the buffer.
  */
-void*
-chew(void* pp, unsigned int cpu, const config & config, const char* label)
+void
+chew1(void* pp, config config, size_t len)
 {
-  int* p = (int*) (config.local ? malloc(config.size) : pp);
-  const size_t count = config.size / sizeof(int);
-  timer t(": ");
-
-  // Pin this thread to cpu `cpu`.
-  if (config.pin) {
-    pin_thread(cpu);
-  }
-
+  int* p = (int*) pp;
+  const size_t count = len / sizeof(int);
   int sum = 0;
   if (config.write) {
     // Write to the region.
@@ -139,11 +144,44 @@
       }
     }
   }
+  global_sum += sum;
+}
 
+/**
+ * \param pp The start of the buffer to chew.
+ * \param cpu Which CPU to pin our thread to.
+ * \param config The experiment configuration parameters.
+ * \param label Prefix for the elapsed time output.
+ */
+void*
+chew(void* pp, unsigned int cpu, const config & config, bool warmup)
+{
+  // Pin this thread to cpu `cpu`.
+  if (config.pin) {
+    pin_thread(cpu);
+  }
+
+  void* p = config.local ? malloc(config.size) : pp;
+  timer t(": ");
+
+  if (!warmup && config.cross) {
+    size_t len = config.size / config.ncores;
+    for (int i = 0; i < config.ncores; i++) {
+      partitions[cpu][i] = new char[len];
+    }
+    int barrier_result = pthread_barrier_wait(&cross_barrier);
+    check(barrier_result == PTHREAD_BARRIER_SERIAL_THREAD || barrier_result == 0);
+    for (int i = 0; i < config.ncores; i++) {
+      chew1(partitions[i][cpu], config, len);
+    }
+  } else {
+    chew1(p, config, config.size);
+  }
+
   // Print the elapsed time and "result".
-  cout << label << cpu;
+  if (warmup) cout << "warmup: " << endl;
+  cout << cpu;
   t.print();
-  cout << "result: " << sum;
 
   if (config.local) free(p);
 
@@ -156,7 +194,7 @@
   // So that our global shared malloc takes place on the CPU 0's node.
   pin_thread(0);
 
-  if (argc < 9) {
+  if (argc < 10) {
     cerr << argv[0] <<
       " <ncores> <size> <nreps> <shuffle> <par> <pin> <local> <write>" << endl;
     return 1;
@@ -171,7 +209,8 @@
     atoi(argv[5]),
     atoi(argv[6]),
     atoi(argv[7]),
-    atoi(argv[8])
+    atoi(argv[8]),
+    atoi(argv[9])
   };
 
   cout << "config:"
@@ -182,24 +221,34 @@
        << " par "     << config.par
        << " pin "     << config.pin
        << " local "   << config.local
-       << " write "   << config.write << endl;
+       << " write "   << config.write
+       << " cross "   << config.cross << endl;
 
   checkmsg(RAND_MAX > config.size / sizeof(int), "PRNG range not large enough");
 
   void *p = malloc(config.size);
+  check(p != NULL);
 
+  if (config.cross) {
+    partitions = new void**[config.ncores];
+    for (unsigned int i  = 0; i < config.ncores; i++)
+      partitions[i] = new void*[config.ncores];
+  }
+
   // Warmup.
-  chew(p, 0, config, "warmup: ");
+  chew(p, 0, config, true);
 
   if (config.par) {
     // Chew the memory area from each core in parallel (and also chew own).
     pthread_t ts[config.ncores];
+    check(0 == pthread_barrier_init(&cross_barrier, NULL, config.ncores));
     for (int i = 0; i < config.ncores; i++) {
-      ts[i] = spawn(bind(chew, p, i, ref(config), ""));
+      ts[i] = spawn(bind(chew, p, i, ref(config), false));
     }
     for (int i = 0; i < config.ncores; i++) {
       check(pthread_join(ts[i], NULL) == 0);
     }
+    check(0 == pthread_barrier_destroy(&cross_barrier));
   } else {
     // Chew the memory area from each core in sequence.
     for (int i = 0; i < config.ncores; i++) {
@@ -208,6 +257,8 @@
   }
 
   free(p);
+  ofstream trash("/dev/null");
+  trash << "result: " << global_sum << endl;
 
   return 0;
 }


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.