[Assorted-commits] SF.net SVN: assorted: [332] hash-join/trunk
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-02-07 00:47:03
|
Revision: 332 http://assorted.svn.sourceforge.net/assorted/?rev=332&view=rev Author: yangzhang Date: 2008-02-06 16:46:50 -0800 (Wed, 06 Feb 2008) Log Message: ----------- added prep and eval tools Added Paths: ----------- hash-join/trunk/tools/ hash-join/trunk/tools/DbPrep.scala hash-join/trunk/tools/LogProc.scala hash-join/trunk/tools/Makefile Added: hash-join/trunk/tools/DbPrep.scala =================================================================== --- hash-join/trunk/tools/DbPrep.scala (rev 0) +++ hash-join/trunk/tools/DbPrep.scala 2008-02-07 00:46:50 UTC (rev 332) @@ -0,0 +1,76 @@ +import commons.Control._ +import commons.Io._ +import java.util.regex._ +object DbPrep { + def extract(p: Pattern, s: String) = { + val m = p matcher s + m.find + (m group 1, m group 2) + } + def cleanTitle(line: String) = { + val t = line indexOf " " + if (t > 0) line take t else line + } + def main(args: Array[String]) { + val pMovie = Pattern compile """^([^\t]+)\t+(.*)$""" + val pActress = Pattern compile """^([^\t]+)\t+([^\t]+)$""" + val (doMovies, doActresses) = (true, true) + if (doMovies) { + using (TextReader("movies.list")) { r => + using (TextWriter("movies.dat")) { w => + var line = r.readLine + try { + var body = false + while (line != null) { + if (body && (line contains "----------------")) { + body = false + } + if (body && line != "") { + val (title, release) = extract(pMovie, line) + w print (title + "\0" + release + "\0\0") + } + if (!body && (line contains "=======")) { + body = true + } + line = r.readLine + } + } catch { + case e: Exception => { Console.err.println(line); throw e } + } + } + } + } + if (doActresses) { + using (TextReader("actresses.list")) { r => + using (TextWriter("actresses.dat")) { w => + var line = r.readLine + try { + var body = false + while (line != null) { + if (body && (line contains "----------------")) { + body = false + } + if (body && line != "") { + val (actress, title) = extract(pActress, line) + w print (actress + "\0" + cleanTitle(title) + "\0") + while (line != "") { + line = r.readLine.trim + if (line != "") { + w print (cleanTitle(title) + "\0") + } + } + w print "\0" + } + if (!body && ((line contains "\t") && (line startsWith "----") && (line endsWith "----"))) { + body = true + } + line = r.readLine + } + } catch { + case e: Exception => { Console.err.println(line); throw e } + } + } + } + } + } +} Added: hash-join/trunk/tools/LogProc.scala =================================================================== --- hash-join/trunk/tools/LogProc.scala (rev 0) +++ hash-join/trunk/tools/LogProc.scala 2008-02-07 00:46:50 UTC (rev 332) @@ -0,0 +1,98 @@ +import commons.Collections._ +import commons.Control._ +import commons.Io._ +// import commons.Plotting._ +import scala.collection.mutable._ + +object LogProc { + type FieldMap = Map[String,Int] + type MutFieldMap = HashMap[String,Int] + case class Stats( + ncpus: Int, + values: FieldMap + ) + val descriptors = Array( + ("movieLoading", "loading movies" ), + ("actressLoading", "loading actresses" ), + ("moviePartitioning", "hash-partitioning movies" ), + ("actressPartitioning", "hash-partitioning actresses" ), + ("movieBuilding", "building with movies" ), + ("actressProbing", "probing with actresses" ), + ("sum", "sum" ) + ) + val fieldNameToLabel = Map(descriptors: _*) + def fieldName(k: Int) = descriptors(k)._1 + def main(args: Array[String]) { + val lines = using (TextReader(args(0))) (_.readLines.toArray) + val map = new MutFieldMap + var ncpus = 0 + val stats = new ArrayBuffer[Stats] + var fieldIndex = Iterator from 0 + + // Parse logs into Stats. + for (line <- lines) { + if (line contains " cpus") { + // Include sum. + map("sum") = sum(map.values) + if (ncpus != 0) stats += Stats(ncpus, map.clone) + ncpus = line.split(" ")(1).toInt + fieldIndex = Iterator from 0 + map.clear + } else if (line contains "main time: ") { + map(fieldName(fieldIndex.next)) = line.split(" ").last.toInt + } + } + + // Build actual plot data. + val plotData = new HashMap[String,ArrayBuffer[Int]] { + override def default(k: String) = { + val buf = new ArrayBuffer[Int] + this(k) = buf + buf + } + } + val ncpuList = stats map (_.ncpus) + for (Stats(ncpus, map) <- stats) { + for (field <- map.keys) { + plotData(field) += map(field) + } + } + + // Produce the time and speedup .dats. + for ((field,times) <- plotData) { + val baseline = times(0).toDouble + println(field + ": " + times) + using (TextWriter(camelToHyphen(field) + "-time.dat")) { w => + for ((time,ncpus) <- times zip ncpuList) { + w.println(ncpus + " " + time) + } + } + using (TextWriter(camelToHyphen(field) + "-speedup.dat")) { w => + for ((time,ncpus) <- times map (baseline / _) zip ncpuList) { + w.println(ncpus + " " + time) + } + } + } + + // Instruct gnuplot. + def f(s:String) = { + { + for ((field,_) <- map) yield ( + "'" + camelToHyphen(field) + s + ".dat" + "' with linespoints title '" + fieldNameToLabel(field) + "'" + ) + } mkString ", " + } + run("gnuplot", """ + set terminal pdf + set xlabel 'number of threads' + + set output 'times.pdf' + set ylabel 'time (ms)' + plot """ + f("-time") + """ + + set output 'speedups.pdf' + set ylabel 'speedup (relative to 1 thread)' + plot """ + f("-speedup") + ) + } +} Added: hash-join/trunk/tools/Makefile =================================================================== --- hash-join/trunk/tools/Makefile (rev 0) +++ hash-join/trunk/tools/Makefile 2008-02-07 00:46:50 UTC (rev 332) @@ -0,0 +1,24 @@ +COMMONS_SRCS := $(wildcard commons/*.scala) +DBPREP_SRCS := DbPrep.scala $(COMMONS_SRCS) +LOGPREP_SRCS := LogProc.scala $(COMMONS_SRCS) + +all: out/DbPrep.class out/LogProc.class + +out/DbPrep.class: $(DBPREP_SRCS) + mkdir -p out + fsc -deprecation -d out $^ + +out/LogProc.class: $(LOGPREP_SRCS) + mkdir -p out + fsc -deprecation -d out $^ + +run: out/DbPrep.class + scala -cp out DbPrep + +proc: out/LogProc.class + scala -cp out LogProc log + +clean: + rm -rf out + +.PHONY: clean run This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |