[Assorted-commits] SF.net SVN: assorted: [401] numa-bench/trunk/src/malloc.cc

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 401
          http://assorted.svn.sourceforge.net/assorted/?rev=401&view=rev
Author:   yangzhang
Date:     2008-02-12 23:58:13 -0800 (Tue, 12 Feb 2008)

Log Message:
-----------
beefed up the test

Modified Paths:
--------------
    numa-bench/trunk/src/malloc.cc

Modified: numa-bench/trunk/src/malloc.cc
===================================================================

--- numa-bench/trunk/src/malloc.cc	2008-02-13 07:58:00 UTC (rev 400)
+++ numa-bench/trunk/src/malloc.cc	2008-02-13 07:58:13 UTC (rev 401)
@@ -1,7 +1,7 @@
 // Questions this program answers:
 //
 // - Does malloc tend to allocate locally?
-//   - Yes. Times working from local node is lower.
+//   - TODO!
 // - How much does working from another node affect throughput?
 //   - A bit: 647x from local, 649x from neighbor, 651x from remote
 // - Is there difference from repeatedly fetching the same (large) area n times
@@ -21,6 +21,7 @@
 
 #include <cstdlib>
 #include <iostream>
+#include <iomanip>
 
 #include <sched.h>
 
@@ -35,77 +36,166 @@
 using namespace commons;
 using namespace std;
 
+struct config
+{
+  /**
+   * The number of cores to test. This is a parameter (rather than
+   * auto-detected) because it additionally serves to mean the number of cores
+   * we want to test in parallel. As this program evolves, these may be
+   * separated.
+   */
+  const int ncores;
+
+  /**
+   * Size in bytes of the buffer to chew.
+   */
+  const size_t size;
+
+  /**
+   * Number of repetitions to chew.
+   */
+  const int nreps;
+
+  /**
+   * Perform rand access, otherwise sequential scan.
+   */
+  const bool shuffle;
+
+  /**
+   * Chew in parallel, otherwise each core chews serially.
+   */
+  const bool par;
+
+  /**
+   * Pin thread i to core i, otherwise let the OS manage things.
+   */
+  const bool pin;
+
+  /**
+   * Chew my own memory, otherwise chew the given (shared) memory.
+   */
+  const bool local;
+
+  /**
+   * Do writes, otherwise just do reads.
+   */
+  const bool write;
+};
+
 /**
  * \param pp The start of the buffer to chew.
- * \param core Which core to pin our thread to.
- * \param size The size of the buffer.
- * \param nreps The number of times to chew through the buffer.
- * \param shuffle If false, sequentially chew through; otherwise, randomly
- *     shuffle the indexes we chew through.
+ * \param cpu Which CPU to pin our thread to.
+ * \param config The experiment configuration parameters.
  */
 void*
-chew(void* pp, unsigned int core, size_t size, unsigned int nreps, bool shuffle)
+chew(void* pp, unsigned int cpu, const config & config, const char* label)
 {
-  int* p = (int*) pp;
-  const size_t count = size / sizeof(int);
-  pid_t pid = gettid();
+  int* p = (int*) (config.local ? malloc(config.size) : pp);
+  const size_t count = config.size / sizeof(int);
   timer t(": ");
 
-  // Pin this thread to core `core`.
-  cpu_set_t cs;
-  CPU_ZERO(&cs);
-  CPU_SET(core, &cs);
-  sched_setaffinity(pid, sizeof(cs), &cs);
+  // Pin this thread to cpu `cpu`.
+  if (config.pin) {
+    pin_thread(cpu);
+  }
 
-  // Write sequentially to the memory region.
-  if (shuffle) {
-    for (unsigned int c = 0; c < nreps; c++) {
-      for (size_t i = 0; i < count; i++) {
-        // NOTE: Using r as the index assumes that rand generates large-enough
-        // values.
-        int r = rand();
-        p[r % count] += r;
+  if (config.write) {
+    // Write to the region.
+    if (config.shuffle) {
+      // Random access into the memory region.
+      for (unsigned int c = 0; c < config.nreps; c++) {
+        for (size_t i = 0; i < count; i++) {
+          // NOTE: Using r as the index assumes that rand generates large-enough
+          // values.
+          int r = rand();
+          p[r % count] += r;
+        }
       }
+    } else {
+      // Sequential scan through the memory region.
+      for (unsigned int c = 0; c < config.nreps; c++) {
+        for (size_t i = 0; i < count; i++) {
+          p[i] += rand();
+        }
+      }
     }
   } else {
-    for (unsigned int c = 0; c < nreps; c++) {
-      for (size_t i = 0; i < count; i++) {
-        p[i] += rand();
+    // Only read from the region.
+    int sum = 0;
+    if (config.shuffle) {
+      // Random access into the memory region.
+      for (unsigned int c = 0; c < config.nreps; c++) {
+        for (size_t i = 0; i < count; i++) {
+          // NOTE: Using r as the index assumes that rand generates large-enough
+          // values.
+          sum += p[rand() % count];
+        }
       }
+    } else {
+      // Sequential scan through the memory region.
+      for (unsigned int c = 0; c < config.nreps; c++) {
+        for (size_t i = 0; i < count; i++) {
+          sum += p[i] + rand();
+        }
+      }
     }
+    cout << sum << endl;
   }
 
   // Print the elapsed time.
-  cout << core;
+  cout << label << cpu;
   t.print();
+
+  if (config.local) free(p);
+
   return NULL;
 }
 
 int
 main(int argc, char** argv)
 {
-  if (argc < 5) {
-    cerr << argv[0] << " <ncores> <size> <nreps> <shuffle>" << endl;
+  // So that our global shared malloc takes place on the CPU 0's node.
+  pin_thread(0);
+
+  if (argc < 9) {
+    cerr << argv[0] <<
+      " <ncores> <size> <nreps> <shuffle> <par> <pin> <local> <write>" << endl;
     return 1;
   }
 
-  // Parse command-line arguments.
-  const int ncores = atoi(argv[1]);
-  const size_t size = atoi(argv[2]);
-  const int nreps = atoi(argv[3]);
-  const bool shuffle = atoi(argv[4]);
+  // Parse command-line arguments. TODO
+  const config config = { 
+    atoi(argv[1]),
+    atoi(argv[2]),
+    atoi(argv[3]),
+    atoi(argv[4]),
+    atoi(argv[5]),
+    atoi(argv[6]),
+    atoi(argv[7]),
+    atoi(argv[8])
+  };
 
-  void *p = malloc(size);
+  checkmsg(RAND_MAX > config.size / sizeof(int), "PRNG range not large enough");
 
+  void *p = malloc(config.size);
+
   // Warmup.
-  cout << "warmup: ";
-  chew(p, 0, size, nreps, shuffle);
+  chew(p, 0, config, "warmup: ");
 
-  // Chew the memory area from each core.
-  for (int i = 0; i < ncores; i++) {
-    pthread_t t;
-    check((t = spawn(bind(chew, p, i, size, nreps, shuffle))) != 0);
-    check(pthread_join(t, NULL) == 0);
+  if (config.par) {
+    // Chew the memory area from each core in parallel (and also chew own).
+    pthread_t ts[config.ncores];
+    for (int i = 0; i < config.ncores; i++) {
+      ts[i] = spawn(bind(chew, p, i, ref(config), ""));
+    }
+    for (int i = 0; i < config.ncores; i++) {
+      check(pthread_join(ts[i], NULL) == 0);
+    }
+  } else {
+    // Chew the memory area from each core in sequence.
+    for (int i = 0; i < config.ncores; i++) {
+      chew(p, i, config, "");
+    }
   }
 
   free(p);


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.