[Assorted-commits] SF.net SVN: assorted: [377] hash-join/trunk/tools
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-02-11 17:03:31
|
Revision: 377 http://assorted.svn.sourceforge.net/assorted/?rev=377&view=rev Author: yangzhang Date: 2008-02-11 09:03:34 -0800 (Mon, 11 Feb 2008) Log Message: ----------- moved to simpler, more generic log processor Modified Paths: -------------- hash-join/trunk/tools/LogProc.scala hash-join/trunk/tools/Makefile Modified: hash-join/trunk/tools/LogProc.scala =================================================================== --- hash-join/trunk/tools/LogProc.scala 2008-02-11 17:03:04 UTC (rev 376) +++ hash-join/trunk/tools/LogProc.scala 2008-02-11 17:03:34 UTC (rev 377) @@ -3,77 +3,65 @@ import commons.Io._ import commons.Debug._ import commons.Path._ -// import commons.Plotting._ import scala.collection.mutable._ +import scala.util._ object LogProc { type FieldMap = Map[String,Double] type MutFieldMap = HashMap[String,Double] - case class Stats( - ncpus: Int, - values: FieldMap - ) - val descriptors = Array( - ("movieLoading", "loading movies" ), - ("actressLoading", "loading actresses" ), - ("moviePartitioning", "hash-partitioning movies" ), - ("actressPartitioning", "hash-partitioning actresses" ), - ("movieBuilding", "building with movies" ), - ("actressProbing", "probing with actresses" ), - ("sum", "sum" ) - ) - val fieldNameToLabel = Map(descriptors: _*) - def fieldName(k: Int) = descriptors(k)._1 + type MutStatMap = HashMap[Int,ArrayBuffer[FieldMap]] def main(args: Array[String]) { - val lines = using (TextReader(args(0))) (_.readLines.toArray) + val indexer = args(0) + val lines = using (TextReader(Console.in)) (_.readLines.toArray) val map = new MutFieldMap - var ncpus = 0 - val stats = new ArrayBuffer[Stats] - var fieldIndex = Iterator from 0 + var index: Option[Int] = None + val stats = new MutStatMap { + override def default(k: Int) = { + val buf = new ArrayBuffer[FieldMap] + this(k) = buf + buf + } + } + val names = new HashSet[String] // Parse logs into Stats. for (line <- lines) { - if (line contains " cpus") { - // Include sum. - if (ncpus != 0) { - map("sum") = sum(map.values) - map("actressLoading") - map("movieLoading") - stats += Stats(ncpus, map.clone) + if (line contains indexer) { + if (index != None) { + stats(index.get) += map.clone } - ncpus = line.split(" ")(1).toInt - fieldIndex = Iterator from 0 + index = Some(line.split(" ")(1).toInt) map.clear - } else if (line contains "main time: ") { - map(fieldName(fieldIndex.next)) = line.split(" ").last.toDouble / 1000.0 + } else { + val Seq(name, value) = line split ": " + names += name + map(name) = value.toDouble / 1000.0 } } - // Build actual plot data. - val plotData = new HashMap[String,ArrayBuffer[Double]] { - override def default(k: String) = { - val buf = new ArrayBuffer[Double] - this(k) = buf - buf + // Build plot data. + val plotData = for (name <- names) yield { + val points = for ((index, buf) <- stats) yield { + val values = for (map <- buf) yield map(name) + (index, mean(values)) } + val array = points.toStream.toArray + Sorting quickSort array + (name, array) } - val ncpuList = stats map (_.ncpus) - for (Stats(ncpus, map) <- stats) { - for (field <- map.keys) { - plotData(field) += map(field) - } - } - // Produce the time and speedup .dats. - for ((field,times) <- plotData) { - val baseline = times(0).toDouble - println(field + ": " + times) - using (TextWriter("data" / camelToHyphen(field) + "-time.dat")) { w => - for ((time,ncpus) <- times zip ncpuList) { - w.println(ncpus + " " + time) + // Write plot data. + for ((name, points) <- plotData) { + println(name + ": " + points.mkString(", ")) + using (TextWriter("data" / spacedToHyphen(name) + "-time.dat")) { w => + for ((index, value) <- points) { + w println (index + " " + value) } } - using (TextWriter("data" / camelToHyphen(field) + "-speedup.dat")) { w => - for ((time,ncpus) <- times map (baseline / _) zip ncpuList) { - w.println(ncpus + " " + time) + val baseline = points(0)._2 + using (TextWriter("data" / spacedToHyphen(name) + "-speedup.dat")) { w => + for ((index, value) <- points) { + w println (index + " " + baseline / value) } } } @@ -81,8 +69,8 @@ // Instruct gnuplot. def f(s:String) = { { - for ((field,_) <- plotData) yield ( - "'data/" + camelToHyphen(field) + s + ".dat" + "' with linespoints title '" + fieldNameToLabel(field) + "'" + for (name <- names) yield ( + "'data/" + spacedToHyphen(name) + s + ".dat' with linespoints title '" + name + "'" ) } mkString ", " } Modified: hash-join/trunk/tools/Makefile =================================================================== --- hash-join/trunk/tools/Makefile 2008-02-11 17:03:04 UTC (rev 376) +++ hash-join/trunk/tools/Makefile 2008-02-11 17:03:34 UTC (rev 377) @@ -23,7 +23,9 @@ proc: out/LogProc.class mkdir -p data - scala -cp out LogProc $(log) + egrep 'cpus|time: ' $(log) | \ + sed 's/ time: /: /' | \ + scala -cp out LogProc " cpus" titles: out/Titles.class cat ../src/movies.dat | tr '\0' '\n' | scala -cp out Titles > titles.txt This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |