Thread: [Assorted-commits] SF.net SVN: assorted: [332] hash-join/trunk

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 332
          http://assorted.svn.sourceforge.net/assorted/?rev=332&view=rev
Author:   yangzhang
Date:     2008-02-06 16:46:50 -0800 (Wed, 06 Feb 2008)

Log Message:
-----------
added prep and eval tools

Added Paths:
-----------
    hash-join/trunk/tools/
    hash-join/trunk/tools/DbPrep.scala
    hash-join/trunk/tools/LogProc.scala
    hash-join/trunk/tools/Makefile

Added: hash-join/trunk/tools/DbPrep.scala
===================================================================

--- hash-join/trunk/tools/DbPrep.scala	                        (rev 0)
+++ hash-join/trunk/tools/DbPrep.scala	2008-02-07 00:46:50 UTC (rev 332)
@@ -0,0 +1,76 @@
+import commons.Control._
+import commons.Io._
+import java.util.regex._
+object DbPrep {
+  def extract(p: Pattern, s: String) = {
+    val m = p matcher s
+    m.find
+    (m group 1, m group 2)
+  }
+  def cleanTitle(line: String) = {
+    val t = line indexOf "  "
+    if (t > 0) line take t else line
+  }
+  def main(args: Array[String]) {
+    val pMovie = Pattern compile """^([^\t]+)\t+(.*)$"""
+    val pActress = Pattern compile """^([^\t]+)\t+([^\t]+)$"""
+    val (doMovies, doActresses) = (true, true)
+    if (doMovies) {
+      using (TextReader("movies.list")) { r =>
+        using (TextWriter("movies.dat")) { w =>
+          var line = r.readLine
+          try {
+            var body = false
+            while (line != null) {
+              if (body && (line contains "----------------")) {
+                body = false
+              }
+              if (body && line != "") {
+                val (title, release) = extract(pMovie, line)
+                w print (title + "\0" + release + "\0\0")
+              }
+              if (!body && (line contains "=======")) {
+                body = true
+              }
+              line = r.readLine
+            }
+          } catch {
+            case e: Exception => { Console.err.println(line); throw e }
+          }
+        }
+      }
+    }
+    if (doActresses) {
+      using (TextReader("actresses.list")) { r =>
+        using (TextWriter("actresses.dat")) { w =>
+          var line = r.readLine
+          try {
+            var body = false
+            while (line != null) {
+              if (body && (line contains "----------------")) {
+                body = false
+              }
+              if (body && line != "") {
+                val (actress, title) = extract(pActress, line)
+                w print (actress + "\0" + cleanTitle(title) + "\0")
+                while (line != "") {
+                  line = r.readLine.trim
+                  if (line != "") {
+                    w print (cleanTitle(title) + "\0")
+                  }
+                }
+                w print "\0"
+              }
+              if (!body && ((line contains "\t") && (line startsWith "----") && (line endsWith "----"))) {
+                body = true
+              }
+              line = r.readLine
+            }
+          } catch {
+            case e: Exception => { Console.err.println(line); throw e }
+          }
+        }
+      }
+    }
+  }
+}

Added: hash-join/trunk/tools/LogProc.scala
===================================================================
--- hash-join/trunk/tools/LogProc.scala	                        (rev 0)
+++ hash-join/trunk/tools/LogProc.scala	2008-02-07 00:46:50 UTC (rev 332)
@@ -0,0 +1,98 @@
+import commons.Collections._
+import commons.Control._
+import commons.Io._
+// import commons.Plotting._
+import scala.collection.mutable._
+
+object LogProc {
+  type FieldMap = Map[String,Int]
+  type MutFieldMap = HashMap[String,Int]
+  case class Stats(
+    ncpus: Int,
+    values: FieldMap
+  )
+  val descriptors = Array(
+    ("movieLoading",         "loading movies"               ),
+    ("actressLoading",       "loading actresses"            ),
+    ("moviePartitioning",    "hash-partitioning movies"     ),
+    ("actressPartitioning",  "hash-partitioning actresses"  ),
+    ("movieBuilding",        "building with movies"         ),
+    ("actressProbing",       "probing with actresses"       ),
+    ("sum",                  "sum"                          )
+  )
+  val fieldNameToLabel = Map(descriptors: _*)
+  def fieldName(k: Int) = descriptors(k)._1
+  def main(args: Array[String]) {
+    val lines = using (TextReader(args(0))) (_.readLines.toArray)
+    val map = new MutFieldMap
+    var ncpus = 0
+    val stats = new ArrayBuffer[Stats]
+    var fieldIndex = Iterator from 0
+
+    // Parse logs into Stats.
+    for (line <- lines) {
+      if (line contains " cpus") {
+        // Include sum.
+        map("sum") = sum(map.values)
+        if (ncpus != 0) stats += Stats(ncpus, map.clone)
+        ncpus = line.split(" ")(1).toInt
+        fieldIndex = Iterator from 0
+        map.clear
+      } else if (line contains "main time: ") {
+        map(fieldName(fieldIndex.next)) = line.split(" ").last.toInt
+      }
+    }
+
+    // Build actual plot data.
+    val plotData = new HashMap[String,ArrayBuffer[Int]] {
+      override def default(k: String) = {
+        val buf = new ArrayBuffer[Int]
+        this(k) = buf
+        buf
+      }
+    }
+    val ncpuList = stats map (_.ncpus)
+    for (Stats(ncpus, map) <- stats) {
+      for (field <- map.keys) {
+        plotData(field) += map(field)
+      }
+    }
+
+    // Produce the time and speedup .dats.
+    for ((field,times) <- plotData) {
+      val baseline = times(0).toDouble
+      println(field + ": " + times)
+      using (TextWriter(camelToHyphen(field) + "-time.dat")) { w =>
+        for ((time,ncpus) <- times zip ncpuList) {
+          w.println(ncpus + " " + time)
+        }
+      }
+      using (TextWriter(camelToHyphen(field) + "-speedup.dat")) { w =>
+        for ((time,ncpus) <- times map (baseline / _) zip ncpuList) {
+          w.println(ncpus + " " + time)
+        }
+      }
+    }
+
+    // Instruct gnuplot.
+    def f(s:String) = {
+      {
+        for ((field,_) <- map) yield (
+          "'" + camelToHyphen(field) + s + ".dat" + "' with linespoints title '" + fieldNameToLabel(field) + "'"
+        )
+      } mkString ", "
+    }
+    run("gnuplot", """
+      set terminal pdf
+      set xlabel 'number of threads'
+
+      set output 'times.pdf'
+      set ylabel 'time (ms)'
+      plot """ + f("-time") + """
+
+      set output 'speedups.pdf'
+      set ylabel 'speedup (relative to 1 thread)'
+      plot """ + f("-speedup")
+    )
+  }
+}

Added: hash-join/trunk/tools/Makefile
===================================================================
--- hash-join/trunk/tools/Makefile	                        (rev 0)
+++ hash-join/trunk/tools/Makefile	2008-02-07 00:46:50 UTC (rev 332)
@@ -0,0 +1,24 @@
+COMMONS_SRCS := $(wildcard commons/*.scala)
+DBPREP_SRCS  := DbPrep.scala $(COMMONS_SRCS)
+LOGPREP_SRCS := LogProc.scala $(COMMONS_SRCS)
+
+all: out/DbPrep.class out/LogProc.class
+
+out/DbPrep.class: $(DBPREP_SRCS)
+	mkdir -p out
+	fsc -deprecation -d out $^
+
+out/LogProc.class: $(LOGPREP_SRCS)
+	mkdir -p out
+	fsc -deprecation -d out $^
+
+run: out/DbPrep.class
+	scala -cp out DbPrep
+
+proc: out/LogProc.class
+	scala -cp out LogProc log
+
+clean:
+	rm -rf out
+
+.PHONY: clean run


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




Thread: [Assorted-commits] SF.net SVN: assorted: [332] hash-join/trunk

assorted-commits