From: <jle...@us...> - 2007-09-20 19:01:15
|
Revision: 1999 http://archive-access.svn.sourceforge.net/archive-access/?rev=1999&view=rev Author: jlee-archive Date: 2007-09-20 12:01:12 -0700 (Thu, 20 Sep 2007) Log Message: ----------- Temporarily commented out CDX generation during import phase since the underlying code in org.archive.wayback is in flux. Modified Paths: -------------- trunk/archive-access/projects/nutchwax/nutchwax-core/src/main/java/org/archive/access/nutch/jobs/ImportArcs.java Modified: trunk/archive-access/projects/nutchwax/nutchwax-core/src/main/java/org/archive/access/nutch/jobs/ImportArcs.java =================================================================== --- trunk/archive-access/projects/nutchwax/nutchwax-core/src/main/java/org/archive/access/nutch/jobs/ImportArcs.java 2007-09-20 18:30:19 UTC (rev 1998) +++ trunk/archive-access/projects/nutchwax/nutchwax-core/src/main/java/org/archive/access/nutch/jobs/ImportArcs.java 2007-09-20 19:01:12 UTC (rev 1999) @@ -90,7 +90,7 @@ import org.archive.util.Base32; import org.archive.util.MimetypeUtils; import org.archive.util.TextUtils; -import org.archive.wayback.resourceindex.indexer.ArcIndexer; +//import org.archive.wayback.resourceindex.indexer.ArcIndexer; /** * Ingests ARCs writing ARC Record parse as Nutch FetcherOutputFormat. @@ -204,6 +204,8 @@ job.setInputPath(arcUrlsDir); + ARCMapRunner.test(); + job.setMapRunnerClass(job.getClass("wax.import.maprunner", ARCMapRunner.class)); job.setMapperClass(job.getClass("wax.import.mapper", this.getClass())); @@ -466,6 +468,9 @@ mw.put(new Text(ImportArcs.ARCFILENAME_KEY), new Text(arcName)); mw.put(new Text(ImportArcs.ARCFILEOFFSET_KEY), new Text(Long.toString(arcData.getOffset()))); + +/* XXX commented out while Wayback is refactored + String cdxLine = null; try @@ -479,6 +484,9 @@ } mw.put(CDXKEY, new Text(cdxLine)); + +*/ + datum.setMetaData(mw); Parse parse = null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |