Thread: [Assorted-commits] SF.net SVN: assorted: [334] hash-join/trunk/tools
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-02-07 01:35:34
|
Revision: 334 http://assorted.svn.sourceforge.net/assorted/?rev=334&view=rev Author: yangzhang Date: 2008-02-06 17:35:37 -0800 (Wed, 06 Feb 2008) Log Message: ----------- fixed graphs Modified Paths: -------------- hash-join/trunk/tools/LogProc.scala hash-join/trunk/tools/Makefile Modified: hash-join/trunk/tools/LogProc.scala =================================================================== --- hash-join/trunk/tools/LogProc.scala 2008-02-07 00:50:59 UTC (rev 333) +++ hash-join/trunk/tools/LogProc.scala 2008-02-07 01:35:37 UTC (rev 334) @@ -77,7 +77,7 @@ // Instruct gnuplot. def f(s:String) = { { - for ((field,_) <- map) yield ( + for ((field,_) <- plotData) yield ( "'" + camelToHyphen(field) + s + ".dat" + "' with linespoints title '" + fieldNameToLabel(field) + "'" ) } mkString ", " Modified: hash-join/trunk/tools/Makefile =================================================================== --- hash-join/trunk/tools/Makefile 2008-02-07 00:50:59 UTC (rev 333) +++ hash-join/trunk/tools/Makefile 2008-02-07 01:35:37 UTC (rev 334) @@ -16,7 +16,7 @@ scala -cp out DbPrep proc: out/LogProc.class - scala -cp out LogProc log + scala -cp out LogProc log-opt clean: rm -rf out This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-11 17:03:31
|
Revision: 377 http://assorted.svn.sourceforge.net/assorted/?rev=377&view=rev Author: yangzhang Date: 2008-02-11 09:03:34 -0800 (Mon, 11 Feb 2008) Log Message: ----------- moved to simpler, more generic log processor Modified Paths: -------------- hash-join/trunk/tools/LogProc.scala hash-join/trunk/tools/Makefile Modified: hash-join/trunk/tools/LogProc.scala =================================================================== --- hash-join/trunk/tools/LogProc.scala 2008-02-11 17:03:04 UTC (rev 376) +++ hash-join/trunk/tools/LogProc.scala 2008-02-11 17:03:34 UTC (rev 377) @@ -3,77 +3,65 @@ import commons.Io._ import commons.Debug._ import commons.Path._ -// import commons.Plotting._ import scala.collection.mutable._ +import scala.util._ object LogProc { type FieldMap = Map[String,Double] type MutFieldMap = HashMap[String,Double] - case class Stats( - ncpus: Int, - values: FieldMap - ) - val descriptors = Array( - ("movieLoading", "loading movies" ), - ("actressLoading", "loading actresses" ), - ("moviePartitioning", "hash-partitioning movies" ), - ("actressPartitioning", "hash-partitioning actresses" ), - ("movieBuilding", "building with movies" ), - ("actressProbing", "probing with actresses" ), - ("sum", "sum" ) - ) - val fieldNameToLabel = Map(descriptors: _*) - def fieldName(k: Int) = descriptors(k)._1 + type MutStatMap = HashMap[Int,ArrayBuffer[FieldMap]] def main(args: Array[String]) { - val lines = using (TextReader(args(0))) (_.readLines.toArray) + val indexer = args(0) + val lines = using (TextReader(Console.in)) (_.readLines.toArray) val map = new MutFieldMap - var ncpus = 0 - val stats = new ArrayBuffer[Stats] - var fieldIndex = Iterator from 0 + var index: Option[Int] = None + val stats = new MutStatMap { + override def default(k: Int) = { + val buf = new ArrayBuffer[FieldMap] + this(k) = buf + buf + } + } + val names = new HashSet[String] // Parse logs into Stats. for (line <- lines) { - if (line contains " cpus") { - // Include sum. - if (ncpus != 0) { - map("sum") = sum(map.values) - map("actressLoading") - map("movieLoading") - stats += Stats(ncpus, map.clone) + if (line contains indexer) { + if (index != None) { + stats(index.get) += map.clone } - ncpus = line.split(" ")(1).toInt - fieldIndex = Iterator from 0 + index = Some(line.split(" ")(1).toInt) map.clear - } else if (line contains "main time: ") { - map(fieldName(fieldIndex.next)) = line.split(" ").last.toDouble / 1000.0 + } else { + val Seq(name, value) = line split ": " + names += name + map(name) = value.toDouble / 1000.0 } } - // Build actual plot data. - val plotData = new HashMap[String,ArrayBuffer[Double]] { - override def default(k: String) = { - val buf = new ArrayBuffer[Double] - this(k) = buf - buf + // Build plot data. + val plotData = for (name <- names) yield { + val points = for ((index, buf) <- stats) yield { + val values = for (map <- buf) yield map(name) + (index, mean(values)) } + val array = points.toStream.toArray + Sorting quickSort array + (name, array) } - val ncpuList = stats map (_.ncpus) - for (Stats(ncpus, map) <- stats) { - for (field <- map.keys) { - plotData(field) += map(field) - } - } - // Produce the time and speedup .dats. - for ((field,times) <- plotData) { - val baseline = times(0).toDouble - println(field + ": " + times) - using (TextWriter("data" / camelToHyphen(field) + "-time.dat")) { w => - for ((time,ncpus) <- times zip ncpuList) { - w.println(ncpus + " " + time) + // Write plot data. + for ((name, points) <- plotData) { + println(name + ": " + points.mkString(", ")) + using (TextWriter("data" / spacedToHyphen(name) + "-time.dat")) { w => + for ((index, value) <- points) { + w println (index + " " + value) } } - using (TextWriter("data" / camelToHyphen(field) + "-speedup.dat")) { w => - for ((time,ncpus) <- times map (baseline / _) zip ncpuList) { - w.println(ncpus + " " + time) + val baseline = points(0)._2 + using (TextWriter("data" / spacedToHyphen(name) + "-speedup.dat")) { w => + for ((index, value) <- points) { + w println (index + " " + baseline / value) } } } @@ -81,8 +69,8 @@ // Instruct gnuplot. def f(s:String) = { { - for ((field,_) <- plotData) yield ( - "'data/" + camelToHyphen(field) + s + ".dat" + "' with linespoints title '" + fieldNameToLabel(field) + "'" + for (name <- names) yield ( + "'data/" + spacedToHyphen(name) + s + ".dat' with linespoints title '" + name + "'" ) } mkString ", " } Modified: hash-join/trunk/tools/Makefile =================================================================== --- hash-join/trunk/tools/Makefile 2008-02-11 17:03:04 UTC (rev 376) +++ hash-join/trunk/tools/Makefile 2008-02-11 17:03:34 UTC (rev 377) @@ -23,7 +23,9 @@ proc: out/LogProc.class mkdir -p data - scala -cp out LogProc $(log) + egrep 'cpus|time: ' $(log) | \ + sed 's/ time: /: /' | \ + scala -cp out LogProc " cpus" titles: out/Titles.class cat ../src/movies.dat | tr '\0' '\n' | scala -cp out Titles > titles.txt This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-11 23:11:38
|
Revision: 378 http://assorted.svn.sourceforge.net/assorted/?rev=378&view=rev Author: yangzhang Date: 2008-02-11 15:11:38 -0800 (Mon, 11 Feb 2008) Log Message: ----------- tweaks Modified Paths: -------------- hash-join/trunk/tools/DbPrep.scala hash-join/trunk/tools/Titles.scala Modified: hash-join/trunk/tools/DbPrep.scala =================================================================== --- hash-join/trunk/tools/DbPrep.scala 2008-02-11 17:03:34 UTC (rev 377) +++ hash-join/trunk/tools/DbPrep.scala 2008-02-11 23:11:38 UTC (rev 378) @@ -10,7 +10,7 @@ } def cleanTitle(line: String) = { val t = line indexOf " " - if (t > 0) line take t else line + if (t > 0) line take t mkString else line } def main(args: Array[String]) { val pMovie = Pattern compile """^([^\t]+)\t+(.*)$""" Modified: hash-join/trunk/tools/Titles.scala =================================================================== --- hash-join/trunk/tools/Titles.scala 2008-02-11 17:03:34 UTC (rev 377) +++ hash-join/trunk/tools/Titles.scala 2008-02-11 23:11:38 UTC (rev 378) @@ -3,7 +3,7 @@ def main(args: Array[String]) { var newPar = true for (line <- untilNull(Console.readLine)) { - if (newPar) { + if (!(line isEmpty) && newPar) { println(line) newPar = false } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-15 01:42:02
|
Revision: 416 http://assorted.svn.sourceforge.net/assorted/?rev=416&view=rev Author: yangzhang Date: 2008-02-14 17:42:05 -0800 (Thu, 14 Feb 2008) Log Message: ----------- first steps to reducing cpu and using int ids Modified Paths: -------------- hash-join/trunk/tools/DbPrep.scala hash-join/trunk/tools/LogProc.scala Modified: hash-join/trunk/tools/DbPrep.scala =================================================================== --- hash-join/trunk/tools/DbPrep.scala 2008-02-15 01:40:15 UTC (rev 415) +++ hash-join/trunk/tools/DbPrep.scala 2008-02-15 01:42:05 UTC (rev 416) @@ -17,6 +17,8 @@ val pActress = Pattern compile """^([^\t]+)\t+([^\t]+)$""" val (doMovies, doActresses) = (true, true) val nreps = args(0).toInt + val title2id = new IdMapper[String] + def titleId(s: String) = serializeInt(title2id(s)) using (TextWriter("movies.dat")) { wm => using (TextWriter("actresses.dat")) { wa => for (i <- 0 until nreps) { @@ -33,6 +35,7 @@ if (body && line != "") { val (title, release) = extract(pMovie, line) wm print (xform(title) + "\0" + release + "\0\0") + // wm print (titleId(title) + xform(title) + "\0" + release + "\0\0") } if (!body && (line contains "=======")) { body = true Modified: hash-join/trunk/tools/LogProc.scala =================================================================== --- hash-join/trunk/tools/LogProc.scala 2008-02-15 01:40:15 UTC (rev 415) +++ hash-join/trunk/tools/LogProc.scala 2008-02-15 01:42:05 UTC (rev 416) @@ -10,6 +10,7 @@ type FieldMap = Map[String,Double] type MutFieldMap = HashMap[String,Double] type MutStatMap = HashMap[Int,ArrayBuffer[FieldMap]] + def dropPrefix(s: String, t: String) = if (t startsWith s) t drop (s.length) mkString else t def main(args: Array[String]) { val indexer = args(0) val lines = using (TextReader(Console.in)) (_.readLines.toArray) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <yan...@us...> - 2008-02-16 21:38:55
|
Revision: 457 http://assorted.svn.sourceforge.net/assorted/?rev=457&view=rev Author: yangzhang Date: 2008-02-16 13:38:54 -0800 (Sat, 16 Feb 2008) Log Message: ----------- moved scala tools to simple-build Added Paths: ----------- hash-join/trunk/tools/build hash-join/trunk/tools/run.bash Removed Paths: ------------- hash-join/trunk/tools/Makefile Deleted: hash-join/trunk/tools/Makefile =================================================================== --- hash-join/trunk/tools/Makefile 2008-02-16 21:37:39 UTC (rev 456) +++ hash-join/trunk/tools/Makefile 2008-02-16 21:38:54 UTC (rev 457) @@ -1,37 +0,0 @@ -COMMONS_SRCS := $(wildcard commons/*.scala) -DBPREP_SRCS := DbPrep.scala $(COMMONS_SRCS) -LOGPROC_SRCS := LogProc.scala $(COMMONS_SRCS) -TITLES_SRCS := Titles.scala $(COMMONS_SRCS) -log := log-josmp -rep := 1 - -FSC = mkdir -p out && fsc -deprecation -d out $^ - -all: out/DbPrep.class out/LogProc.class - -out/DbPrep.class: $(DBPREP_SRCS) - $(FSC) - -out/LogProc.class: $(LOGPROC_SRCS) - $(FSC) - -out/Titles.class: $(TITLES_SRCS) - $(FSC) - -prep: out/DbPrep.class - scala -cp out DbPrep $(rep) - -proc: out/LogProc.class - mkdir -p data - egrep 'cpus|time: ' $(log) | \ - sed 's/ time: /: /' | \ - scala -cp out LogProc " cpus" - -titles: out/Titles.class - cat ../src/movies.dat | tr '\0' '\n' | scala -cp out Titles > titles.txt - # head -c 1024 movies.dat | tr '\0' '\n' | scala -cp out Titles > titles.txt - -clean: - rm -rf out - -.PHONY: clean run prep proc Added: hash-join/trunk/tools/build =================================================================== --- hash-join/trunk/tools/build (rev 0) +++ hash-join/trunk/tools/build 2008-02-16 21:38:54 UTC (rev 457) @@ -0,0 +1,11 @@ +prep: + mainclass: DbPrep + srcs: [DbPrep.scala] + +proc: + mainclass: LogProc + srcs: [LogProc.scala] + +titles: + mainclass: Titles + srcs: [Titles.scala] Added: hash-join/trunk/tools/run.bash =================================================================== --- hash-join/trunk/tools/run.bash (rev 0) +++ hash-join/trunk/tools/run.bash 2008-02-16 21:38:54 UTC (rev 457) @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +set -o errexit -o nounset + +make -s + +prep() { + out/prep "${@:-10}" +} + +proc() { + mkdir -p data + + egrep 'cpus|time: ' $1 | + sed 's/ time: /: /' | + out/proc ' cpus' +} + +titles() { + cat ../src/movies.dat | + tr '\0' '\n' | + out/titles > titles.txt +} + +"$@" Property changes on: hash-join/trunk/tools/run.bash ___________________________________________________________________ Name: svn:executable + * This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |