From: <sta...@us...> - 2007-01-22 21:20:26
|
Revision: 1450 http://archive-access.svn.sourceforge.net/archive-access/?rev=1450&view=rev Author: stack-sf Date: 2007-01-22 13:15:40 -0800 (Mon, 22 Jan 2007) Log Message: ----------- M projects/nutch/project.xml _M projects/nutch/src/images/nutchwax.jpg _M projects/nutch/src/images/nwa.jpg _M projects/nutch/src/images/iipc.gif D projects/nutch/bin/indexArcsLogReporter.py M projects/wayback/project.xml M projects/waxtoolbar/xdocs/downloads.xml M projects/waxtoolbar/project.xml M projects/nutch-trec/xdocs/index.xml M projects/nutch-trec/project.xml M projects/nutch-trec/README.txt M projects/wera/xdocs/downloads.xml M projects/wera/project.xml Purge more of the cvs references. Modified Paths: -------------- trunk/archive-access/projects/nutch/project.xml trunk/archive-access/projects/nutch-trec/README.txt trunk/archive-access/projects/nutch-trec/project.xml trunk/archive-access/projects/nutch-trec/xdocs/index.xml trunk/archive-access/projects/waxtoolbar/project.xml trunk/archive-access/projects/waxtoolbar/xdocs/downloads.xml trunk/archive-access/projects/wayback/project.xml trunk/archive-access/projects/wera/project.xml trunk/archive-access/projects/wera/xdocs/downloads.xml Removed Paths: ------------- trunk/archive-access/projects/nutch/bin/indexArcsLogReporter.py Property Changed: ---------------- trunk/archive-access/projects/nutch/src/images/iipc.gif trunk/archive-access/projects/nutch/src/images/nutchwax.jpg trunk/archive-access/projects/nutch/src/images/nwa.jpg Deleted: trunk/archive-access/projects/nutch/bin/indexArcsLogReporter.py =================================================================== --- trunk/archive-access/projects/nutch/bin/indexArcsLogReporter.py 2007-01-22 21:02:35 UTC (rev 1449) +++ trunk/archive-access/projects/nutch/bin/indexArcsLogReporter.py 2007-01-22 21:15:40 UTC (rev 1450) @@ -1,181 +0,0 @@ -#!/usr/bin/env python -# -# Feed indexArcs output to this script and it will summarize the content. -# -# $Id$ -# -'''Usage: %s [--help] [--debug] FILE1 FILE2... -Version: %s -Options: - -h, --help Print this usage message. - -d, --debug Enable debugging. - -c, --csv Output reports as cvs. -Run this script against output of indexArcs script. -''' -__author__ = "Michael Stack <stack at archive dot org>" -__date__ = "Wed Jan 25 11:34:16 PST 2006" -__version__ = "0.1.0" - - -import sys -import string -import time -import logging -import util - - -# Setup logging. -logging.basicConfig() -logger = logging.getLogger(sys.argv[0]) -logger.setLevel(logging.INFO) - - -class IndexArcsParser: - def __init__(self, fd): - self.fd = fd - - def readline(self): - line = self.fd.readline() - logger.debug(line) - return line - - def findStartTime(self, str2Find): - startTime = None - while 1: - line = self.readline() - if not line: - raise IOError, "Failed find of start ('%s') line" % str2Find - if line.find(str2Find) > 0: - startTime = makeTime(line.split()[0], line.split()[1]) - logger.debug('startTime %d' % int(startTime)) - break - return startTime - - def findEndTime(self, str2Find): - startReduceTime = None - while 1: - line = self.fd.readline() - if not line: - raise IOError, "Failed find of end ('%s') line" % str2Find - if not startReduceTime: - if line.find(' reduce ') > 0: - startReduceTime = makeTime(line.split()[0], line.split()[1]) - logger.debug('startReduceTime %d' % int(startReduceTime)) - continue - if line.find(str2Find) > 0: - logger.debug("Found in : " + line) - endTime = makeTime(line.split()[0], line.split()[1]) - logger.debug('endTime %d' % int(endTime)) - break - return startReduceTime, endTime - -def processLog(fd): - '''Expect particular format. Fail if its otherwise.''' - parser = IndexArcsParser(fd) - startTime, x = processImportArcs(parser) - updateCrawlDbEndTime = processUpdateCrawlDb(parser) - processInvertLinks(parser) - processIndexer(parser) - processDedup(parser) - x, endTime = processMerge(parser) - if globals().has_key("csv"): - logger.info("Total, %d" % int(endTime - startTime)) - else: - logger.info("Total process took %d" % int(endTime - startTime)) - -def makeTime(d, t): - return time.mktime(time.strptime("%s%s" % (d, t), "%y%m%d%H%M%S")) - -def processImportArcs(parser): - startTime = parser.findStartTime('importing arcs') - firstReduceTime, endTime = parser.findEndTime(" ImportArcs: done") - writeReport("ImportArcs", startTime, endTime, firstReduceTime) - return startTime, endTime - -def processUpdateCrawlDb(parser): - startTime = parser.findStartTime('updating crawldb') - firstReduceTime, endTime = parser.findEndTime("CrawlDb update: done") - writeReport("UpdateCrawlDb", startTime, endTime, firstReduceTime) - return startTime, endTime - -def processInvertLinks(parser): - startTime = parser.findStartTime(' inverting links ') - firstReduceTime, endTime = parser.findEndTime("LinkDb: done") - writeReport("InvertLinks", startTime, endTime, firstReduceTime) - return startTime, endTime - -def processIndexer(parser): - startTime = parser.findStartTime(' Indexer: linkdb: ') - firstReduceTime, endTime = parser.findEndTime("Indexer: done") - writeReport("Indexer", startTime, endTime, 0) - return startTime, endTime - -def processDedup(parser): - startTime = parser.findStartTime(' Dedup: starting') - firstReduceTime, endTime = parser.findEndTime("Dedup: done") - writeReport("Dedup", startTime, endTime, firstReduceTime) - return startTime, endTime - -def processMerge(parser): - startTime = parser.findStartTime(' index merge ') - firstReduceTime, endTime = parser.findEndTime(" Nutchwax finished") - writeReport("Merge", startTime, endTime, firstReduceTime) - return startTime, endTime - -def writeReport(task, startTime, endTime, firstReduceTime): - '''Write out report on import arcs mapreduce task.''' - if firstReduceTime: - mapTime = int(firstReduceTime - startTime) - reduceTime = int(endTime - firstReduceTime) - else: - mapTime = 0 - reduceTime = 0 - totalTime = int(endTime - startTime) - if globals().has_key("csv"): - logger.info("%s, %d, %d, %d" % - (task, totalTime, mapTime, reduceTime)) - else: - logger.info("%s took %d seconds, map %d %d%%, reduce %d %d%%" % - (task, totalTime, mapTime, util.formatPercent(mapTime, totalTime), - reduceTime, util.formatPercent(reduceTime, totalTime))) - -def usage(exitCode = 0, msg = None): - '''Print usage.''' - if msg: - print msg - print __doc__ % (sys.argv[0], __version__) - sys.exit(exitCode) - -def main(args): - '''Main entry point.''' - if len(args) == 1: - processLog(sys.stdin) - else: - # Do opt processing. - import getopt - try: - opts, args = getopt.getopt(sys.argv[1:], "hdc", - ["help", "debug", "csv"]) - except getopt.GetoptError, e: - usage(1, e) - for key, value in opts: - if key in ('-d', '--debug'): - logger.setLevel(logging.DEBUG) - continue - if key in ('-h', '--help'): - usage(0) - continue - if key in ('-c', '--csv'): - globals()["csv"] = 1 - continue - else: - raise ValueError, "Unexpected option %s", key - for name in args: - fd = open(name) - try: - processLog(fd) - finally: - fd.close() - -if __name__ == "__main__": - main(sys.argv) Modified: trunk/archive-access/projects/nutch/project.xml =================================================================== --- trunk/archive-access/projects/nutch/project.xml 2007-01-22 21:02:35 UTC (rev 1449) +++ trunk/archive-access/projects/nutch/project.xml 2007-01-22 21:15:40 UTC (rev 1450) @@ -53,8 +53,8 @@ the connection element has the form: scm:<system>:<system specific connection string> --> <repository> - <connection>scm:svn:https://archive-access.svn.sourceforge.net/svnroot/archive-access/</connection> - <url>https://archive-access.svn.sourceforge.net/svnroot/archive-access/</url> + <connection>scm:svn:https://archive-access.svn.sourceforge.net/svnroot/archive-access/trunk/archive-access/projects/nutch</connection> + <url>https://archive-access.svn.sourceforge.net/svnroot/archive-access/trunk/archive-access/projects/nutch</url> </repository> <!-- any mailing lists for the project --> @@ -72,7 +72,7 @@ </archive> </mailingList> <mailingList> - <name>SVN Commits</name> + <name>Commits</name> <subscribe> http://lists.sourceforge.net/lists/listinfo/archive-access-cvs </subscribe> Property changes on: trunk/archive-access/projects/nutch/src/images/iipc.gif ___________________________________________________________________ Name: svn:keywords - Author Date Id Revision Name: svn:eol-style - native Property changes on: trunk/archive-access/projects/nutch/src/images/nutchwax.jpg ___________________________________________________________________ Name: svn:keywords - Author Date Id Revision Name: svn:eol-style - native Property changes on: trunk/archive-access/projects/nutch/src/images/nwa.jpg ___________________________________________________________________ Name: svn:mime-type - application/octet-stream Modified: trunk/archive-access/projects/nutch-trec/README.txt =================================================================== --- trunk/archive-access/projects/nutch-trec/README.txt 2007-01-22 21:02:35 UTC (rev 1449) +++ trunk/archive-access/projects/nutch-trec/README.txt 2007-01-22 21:15:40 UTC (rev 1450) @@ -9,7 +9,7 @@ assumes the nutch sources are in nutch/. So if you have a checkout of the nutch subversion repository: - $ cd ${ARCHIVE_ACCESS_CVS}/projects/nutch-trec + $ cd ${ARCHIVE_ACCESS}/projects/nutch-trec $ ln -s ${NUTCH_SVN}/trunk nutch $ ant @@ -17,7 +17,7 @@ source .java files from the .jj javacc file you need a copy of JavaCC in JavaCC/, eg: - $ cd ${ARCHIVE_ACCESS_CVS}/projects/nutch-trec + $ cd ${ARCHIVE_ACCESS}/projects/nutch-trec $ ln -s ${JAVACC_HOME} JavaCC $ ant javacc Modified: trunk/archive-access/projects/nutch-trec/project.xml =================================================================== --- trunk/archive-access/projects/nutch-trec/project.xml 2007-01-22 21:02:35 UTC (rev 1449) +++ trunk/archive-access/projects/nutch-trec/project.xml 2007-01-22 21:15:40 UTC (rev 1450) @@ -63,11 +63,11 @@ <!-- the version control repository and http url for online access the connection element has the form: scm:<system>:<system specific connection string> --> + <repository> - <connection>scm:cvs:pserver:ano...@ar...:/cvsroot/archive-access:archive-access/projects/nutch-trec</connection> - <url>http://archive-access.cvs.sourceforge.net/archive-access/archive-access/projects/nutch-trec/</url> + <connection>scm:svn:https://archive-access.svn.sourceforge.net/svnroot/archive-access/trunk/archive-access/projects/nutch-trec</connection> + <url>https://archive-access.svn.sourceforge.net/svnroot/archive-access/trunk/archive-access/projects/nutch-trec</url> </repository> - <!-- any mailing lists for the project --> <mailingLists> <mailingList> @@ -83,7 +83,7 @@ </archive> </mailingList> <mailingList> - <name>CVS Commits</name> + <name>Commits</name> <subscribe> http://lists.sourceforge.net/lists/listinfo/archive-access-cvs </subscribe> Modified: trunk/archive-access/projects/nutch-trec/xdocs/index.xml =================================================================== --- trunk/archive-access/projects/nutch-trec/xdocs/index.xml 2007-01-22 21:02:35 UTC (rev 1449) +++ trunk/archive-access/projects/nutch-trec/xdocs/index.xml 2007-01-22 21:15:40 UTC (rev 1450) @@ -17,7 +17,7 @@ assumes the nutch sources are in the subdirectory <code>nutch/</code>. So if you have a checkout of the nutch subversion repository: <pre> - $ cd ${ARCHIVE_ACCESS_CVS}/projects/nutch-trec + $ cd ${ARCHIVE_ACCESS}/projects/nutch-trec $ ln -s ${NUTCH_SVN}/trunk nutch $ ant</pre> </p> @@ -27,7 +27,7 @@ If you wish to rebuild the JavaCC generated sources from the .jj javacc file you need a copy of JavaCC in JavaCC/, eg: <pre> - $ cd ${ARCHIVE_ACCESS_CVS}/projects/nutch-trec + $ cd ${ARCHIVE_ACCESS}/projects/nutch-trec $ ln -s ${JAVACC_HOME} JavaCC $ ant javacc</pre> </p> Modified: trunk/archive-access/projects/waxtoolbar/project.xml =================================================================== --- trunk/archive-access/projects/waxtoolbar/project.xml 2007-01-22 21:02:35 UTC (rev 1449) +++ trunk/archive-access/projects/waxtoolbar/project.xml 2007-01-22 21:15:40 UTC (rev 1450) @@ -45,9 +45,10 @@ <distributionSite>http://shell.sourceforge.net</distributionSite> <distributionDirectory>/home/users/s/st/${maven.username} </distributionDirectory> + <repository> - <connection>scm:cvs:pserver:ano...@cv...:/cvsroot/archive-access:archive-access</connection> - <url>http://cvs.sourceforge.net/viewcvs.py/archive-access/archive-access/projects/waxtoolbar</url> + <connection>scm:svn:https://archive-access.svn.sourceforge.net/svnroot/archive-access/trunk/archive-access/projects/waxtoolbar</connection> + <url>https://archive-access.svn.sourceforge.net/svnroot/archive-access/trunk/archive-access/projects/waxtoolbar</url> </repository> <mailingLists> <mailingList> @@ -63,7 +64,7 @@ </archive> </mailingList> <mailingList> - <name>CVS Commits</name> + <name>Commits</name> <subscribe> http://lists.sourceforge.net/lists/listinfo/archive-access-cvs </subscribe> Modified: trunk/archive-access/projects/waxtoolbar/xdocs/downloads.xml =================================================================== --- trunk/archive-access/projects/waxtoolbar/xdocs/downloads.xml 2007-01-22 21:02:35 UTC (rev 1449) +++ trunk/archive-access/projects/waxtoolbar/xdocs/downloads.xml 2007-01-22 21:15:40 UTC (rev 1450) @@ -19,7 +19,7 @@ <p>Here is a <a href="http://builds.archive.org:8080/cruisecontrol/buildresults/HEAD-archive-access">pointer</a> to our continuous build box. The latest builds can be found under the 'Build Artifacts' link. Be aware that - this distribution has been made from CVS HEAD and CVS HEAD builds are + this distribution has been made from HEAD and HEAD builds are not guaranteed stable. </p> </subsection> Modified: trunk/archive-access/projects/wayback/project.xml =================================================================== --- trunk/archive-access/projects/wayback/project.xml 2007-01-22 21:02:35 UTC (rev 1449) +++ trunk/archive-access/projects/wayback/project.xml 2007-01-22 21:15:40 UTC (rev 1450) @@ -79,8 +79,8 @@ scm:<system>:<system specific connection string> --> <repository> - <connection>scm:cvs:pserver:ano...@ar...:/cvsroot/archive-access:archive-access</connection> - <url>http://archive-access.cvs.sourceforge.net/archive-access/archive-access/projects/wayback/</url> + <connection>scm:svn:https://archive-access.svn.sourceforge.net/svnroot/archive-access/trunk/archive-access/projects/wayback</connection> + <url>https://archive-access.svn.sourceforge.net/svnroot/archive-access/trunk/archive-access/projects/wayback/</url> </repository> <versions /> @@ -100,7 +100,7 @@ </archive> </mailingList> <mailingList> - <name>CVS Commits</name> + <name>SVN Commits</name> <subscribe> http://lists.sourceforge.net/lists/listinfo/archive-access-cvs </subscribe> Modified: trunk/archive-access/projects/wera/project.xml =================================================================== --- trunk/archive-access/projects/wera/project.xml 2007-01-22 21:02:35 UTC (rev 1449) +++ trunk/archive-access/projects/wera/project.xml 2007-01-22 21:15:40 UTC (rev 1450) @@ -65,8 +65,8 @@ the connection element has the form: scm:<system>:<system specific connection string> --> <repository> - <connection>scm:cvs:pserver:ano...@cv...:/cvsroot/archive-access:archive-access</connection> - <url>http://cvs.sourceforge.net/viewcvs.py/archive-access/archive-access/projects/wera</url> + <connection>scm:svn:https://archive-access.svn.sourceforge.net/svnroot/archive-access/trunk/archive-access/projects/wera</connection> + <url>https://archive-access.svn.sourceforge.net/svnroot/archive-access/trunk/archive-access/projects/wera</url> </repository> <!-- any mailing lists for the project --> @@ -84,7 +84,7 @@ </archive> </mailingList> <mailingList> - <name>CVS Commits</name> + <name>Commits</name> <subscribe> http://lists.sourceforge.net/lists/listinfo/archive-access-cvs </subscribe> Modified: trunk/archive-access/projects/wera/xdocs/downloads.xml =================================================================== --- trunk/archive-access/projects/wera/xdocs/downloads.xml 2007-01-22 21:02:35 UTC (rev 1449) +++ trunk/archive-access/projects/wera/xdocs/downloads.xml 2007-01-22 21:15:40 UTC (rev 1450) @@ -18,7 +18,7 @@ <p>Here is a <a href="http://builds.archive.org:8080/cruisecontrol/buildresults/HEAD-archive-access">pointer</a> to our continuous build box. The latest builds can be found under the 'Build Artifacts' link. Be aware that - this distribution has been made from CVS HEAD and CVS HEAD builds are + this distribution has been made from HEAD and HEAD builds are not guaranteed stable. </p> </subsection> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |