[Assorted-commits] SF.net SVN: assorted: [386] numa-bench/trunk/src/malloc.cc

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 386
          http://assorted.svn.sourceforge.net/assorted/?rev=386&view=rev
Author:   yangzhang
Date:     2008-02-11 17:47:59 -0800 (Mon, 11 Feb 2008)

Log Message:
-----------
nice malloc test

Modified Paths:
--------------
    numa-bench/trunk/src/malloc.cc

Modified: numa-bench/trunk/src/malloc.cc
===================================================================

--- numa-bench/trunk/src/malloc.cc	2008-02-12 01:22:31 UTC (rev 385)
+++ numa-bench/trunk/src/malloc.cc	2008-02-12 01:47:59 UTC (rev 386)
@@ -1,5 +1,22 @@
-// Does malloc tend to allocate locally?
+// Questions this program answers:
+//
+// - Does malloc tend to allocate locally?
+//   - Yes. Times working from local node is lower.
+// - How much does working from another node affect throughput?
+//   - A bit: 647x from local, 649x from neighbor, 651x from remote
+// - Is there difference from repeatedly fetching the same (large) area n times
+//   vs. fetching an area n times larger?
+//   - No. The times are identical for 1GB*1 and 100MB*10.
+// - How much difference is there between sequential scan and random access?
+//   - Huge difference. Also magnifies the locality effects more.
+//   - 1700 from local, 1990 from one neighbor, 2020 from another neighbor,
+//     and 2310 from remote.
+// - Can we observe prefetching's effects? (Random access but chew the full
+//   cache line of data.)
+//   - TODO!
 
+// TODO: use real shuffling? or is rand ok?
+
 #include <cstdlib>
 #include <iostream>
 
@@ -16,30 +33,44 @@
 using namespace commons;
 using namespace std;
 
-const size_t size = 10000000;
-
+/**
+ * \param pp The start of the buffer to chew.
+ * \param core Which core to pin our thread to.
+ * \param size The size of the buffer.
+ * \param nreps The number of times to chew through the buffer.
+ * \param shuffle If false, sequentially chew through; otherwise, randomly
+ *     shuffle the indexes we chew through.
+ */
 void*
-chew(void* pp, int core)
+chew(void* pp, int core, size_t size, int nreps, bool shuffle)
 {
   char* p = (char*) pp;
-  const int reps = 100;
   pid_t pid = gettid();
   timer t(": ");
 
-  // Pin this thread to the right processor.
+  // Pin this thread to core `core`.
   cpu_set_t cs;
   CPU_ZERO(&cs);
   CPU_SET(core, &cs);
   sched_setaffinity(pid, sizeof(cs), &cs);
 
-  for (int c = 0; c < reps; c++) {
-    for (size_t i = 0; i < size; i++) {
-      p[i] = i;
+  // Write sequentially to the memory region.
+  if (shuffle) {
+    for (int c = 0; c < nreps; c++) {
+      for (size_t i = 0; i < size; i++) {
+        p[rand() % size] = i;
+      }
     }
+  } else {
+    for (int c = 0; c < nreps; c++) {
+      for (size_t i = 0; i < size; i++) {
+        p[i] = i;
+      }
+    }
   }
 
   // Print the elapsed time.
-  cout << pid;
+  cout << core;
   t.print();
   return NULL;
 }
@@ -47,33 +78,31 @@
 int
 main(int argc, char** argv)
 {
-  if (argc < 2) {
-    cerr << "malloc <nthreads>" << endl;
+  if (argc < 5) {
+    cerr << argv[0] << " <ncores> <size> <nreps> <shuffle>" << endl;
     return 1;
   }
 
-  const int n = atoi(argv[1]);
+  // Parse command-line arguments.
+  const int ncores = atoi(argv[1]);
+  const size_t size = atoi(argv[2]);
+  const int nreps = atoi(argv[3]);
+  const bool shuffle = atoi(argv[4]);
+
   void *p = malloc(size);
 
-  // warmup
-  chew(p, 0);
-  pthread_t ts[n];
+  // Warmup.
+  cout << "warmup: ";
+  chew(p, 0, size, nreps, shuffle);
 
-  // start thread on each core
-  for (int i = 0; i < n; i++) {
+  // Chew the memory area from each core.
+  for (int i = 0; i < ncores; i++) {
     pthread_t t;
-    check((t = spawn(bind(chew, p, i))) != 0);
+    check((t = spawn(bind(chew, p, i, size, nreps, shuffle))) != 0);
     check(pthread_join(t, NULL) == 0);
   }
-  // waitall(ts, n);
-  return 0;
 
-  // THRASH
+  free(p);
 
-  // spawn workers
-  for (int i = 0; i < n; i++) {
-    check((ts[i] = spawn(bind(chew, p, i))) == 0);
-  }
-  waitall(ts, n);
   return 0;
 }


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.