[Assorted-commits] SF.net SVN: assorted: [416] hash-join/trunk/tools
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-02-15 01:42:02
|
Revision: 416 http://assorted.svn.sourceforge.net/assorted/?rev=416&view=rev Author: yangzhang Date: 2008-02-14 17:42:05 -0800 (Thu, 14 Feb 2008) Log Message: ----------- first steps to reducing cpu and using int ids Modified Paths: -------------- hash-join/trunk/tools/DbPrep.scala hash-join/trunk/tools/LogProc.scala Modified: hash-join/trunk/tools/DbPrep.scala =================================================================== --- hash-join/trunk/tools/DbPrep.scala 2008-02-15 01:40:15 UTC (rev 415) +++ hash-join/trunk/tools/DbPrep.scala 2008-02-15 01:42:05 UTC (rev 416) @@ -17,6 +17,8 @@ val pActress = Pattern compile """^([^\t]+)\t+([^\t]+)$""" val (doMovies, doActresses) = (true, true) val nreps = args(0).toInt + val title2id = new IdMapper[String] + def titleId(s: String) = serializeInt(title2id(s)) using (TextWriter("movies.dat")) { wm => using (TextWriter("actresses.dat")) { wa => for (i <- 0 until nreps) { @@ -33,6 +35,7 @@ if (body && line != "") { val (title, release) = extract(pMovie, line) wm print (xform(title) + "\0" + release + "\0\0") + // wm print (titleId(title) + xform(title) + "\0" + release + "\0\0") } if (!body && (line contains "=======")) { body = true Modified: hash-join/trunk/tools/LogProc.scala =================================================================== --- hash-join/trunk/tools/LogProc.scala 2008-02-15 01:40:15 UTC (rev 415) +++ hash-join/trunk/tools/LogProc.scala 2008-02-15 01:42:05 UTC (rev 416) @@ -10,6 +10,7 @@ type FieldMap = Map[String,Double] type MutFieldMap = HashMap[String,Double] type MutStatMap = HashMap[Int,ArrayBuffer[FieldMap]] + def dropPrefix(s: String, t: String) = if (t startsWith s) t drop (s.length) mkString else t def main(args: Array[String]) { val indexer = args(0) val lines = using (TextReader(Console.in)) (_.readLines.toArray) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |