[Assorted-commits] SF.net SVN: assorted: [369] hash-join/trunk/tools/DbPrep.scala
Brought to you by:
yangzhang
From: <yan...@us...> - 2008-02-11 05:18:42
|
Revision: 369 http://assorted.svn.sourceforge.net/assorted/?rev=369&view=rev Author: yangzhang Date: 2008-02-10 21:18:47 -0800 (Sun, 10 Feb 2008) Log Message: ----------- DbPrep can generate larger data sets by repeating n times and using rot encoding Modified Paths: -------------- hash-join/trunk/tools/DbPrep.scala Modified: hash-join/trunk/tools/DbPrep.scala =================================================================== --- hash-join/trunk/tools/DbPrep.scala 2008-02-11 05:17:48 UTC (rev 368) +++ hash-join/trunk/tools/DbPrep.scala 2008-02-11 05:18:47 UTC (rev 369) @@ -1,3 +1,4 @@ +import commons.Collections._ import commons.Control._ import commons.Io._ import java.util.regex._ @@ -15,59 +16,63 @@ val pMovie = Pattern compile """^([^\t]+)\t+(.*)$""" val pActress = Pattern compile """^([^\t]+)\t+([^\t]+)$""" val (doMovies, doActresses) = (true, true) - if (doMovies) { - using (TextReader("movies.list")) { r => - using (TextWriter("movies.dat")) { w => - var line = r.readLine - try { - var body = false - while (line != null) { - if (body && (line contains "----------------")) { - body = false + val nreps = args(0).toInt + using (TextWriter("movies.dat")) { wm => + using (TextWriter("actresses.dat")) { wa => + for (i <- 0 until nreps) { + def xform(s: String) = if (i == 0) s else rot(i, s) + if (doMovies) { + using (TextReader("movies.list")) { r => + var line = r.readLine + try { + var body = false + while (line != null) { + if (body && (line contains "----------------")) { + body = false + } + if (body && line != "") { + val (title, release) = extract(pMovie, line) + wm print (xform(title) + "\0" + release + "\0\0") + } + if (!body && (line contains "=======")) { + body = true + } + line = r.readLine + } + } catch { + case e: Exception => { Console.err.println(line); throw e } } - if (body && line != "") { - val (title, release) = extract(pMovie, line) - w print (title + "\0" + release + "\0\0") - } - if (!body && (line contains "=======")) { - body = true - } - line = r.readLine } - } catch { - case e: Exception => { Console.err.println(line); throw e } } - } - } - } - if (doActresses) { - using (TextReader("actresses.list")) { r => - using (TextWriter("actresses.dat")) { w => - var line = r.readLine - try { - var body = false - while (line != null) { - if (body && (line contains "----------------")) { - body = false - } - if (body && line != "") { - val (actress, title) = extract(pActress, line) - w print (actress + "\0" + cleanTitle(title) + "\0") - while (line != "") { - line = r.readLine.trim - if (line != "") { - w print (cleanTitle(title) + "\0") + if (doActresses) { + using (TextReader("actresses.list")) { r => + var line = r.readLine + try { + var body = false + while (line != null) { + if (body && (line contains "----------------")) { + body = false } + if (body && line != "") { + val (actress, title) = extract(pActress, line) + wa print (actress + "\0" + cleanTitle(xform(title)) + "\0") + while (line != "") { + line = r.readLine.trim + if (line != "") { + wa print (cleanTitle(xform(title)) + "\0") + } + } + wa print "\0" + } + if (!body && ((line contains "\t") && (line startsWith "----") && (line endsWith "----"))) { + body = true + } + line = r.readLine } - w print "\0" + } catch { + case e: Exception => { Console.err.println(line); throw e } } - if (!body && ((line contains "\t") && (line startsWith "----") && (line endsWith "----"))) { - body = true - } - line = r.readLine } - } catch { - case e: Exception => { Console.err.println(line); throw e } } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |