Revision: 1933 http://archive-access.svn.sourceforge.net/archive-access/?rev=1933&view=rev Author: bradtofel Date: 2007-08-23 15:52:33 -0700 (Thu, 23 Aug 2007) Log Message: ----------- REFACTOR: now uses the various CDX-SearchResult-BDB adapters. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java Modified: trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java 2007-08-23 22:51:18 UTC (rev 1932) +++ trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java 2007-08-23 22:52:33 UTC (rev 1933) @@ -26,7 +26,10 @@ import org.archive.io.arc.ARCRecord; import org.archive.mapred.ARCMapRunner; import org.archive.mapred.ARCRecordMapper; -import org.archive.wayback.resourceindex.indexer.ArcIndexer; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.resourcestore.ArcIndexer; +import org.archive.wayback.resourcestore.ARCRecordToSearchResultAdapter; +import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; /** * Hadoop Driver for generation of alphabetically partitioned Wayback CDX @@ -48,18 +51,24 @@ private Text outKey = new Text(); private Text outValue = new Text(""); + private ARCRecordToSearchResultAdapter ARtoSR = + new ARCRecordToSearchResultAdapter(); + private SearchResultToCDXLineAdapter SRtoCDX = + new SearchResultToCDXLineAdapter(); + public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter) throws IOException { ObjectWritable ow = (ObjectWritable) value; ARCRecord rec = (ARCRecord) ow.get(); String line; - try { - line = ArcIndexer.arcRecordToCDXLine(rec); + SearchResult result = ARtoSR.adapt(rec); + if(result != null) { + line = SRtoCDX.adapt(result); + if(line != null) { - outKey.set(line); - output.collect(outKey, outValue); - } catch (ParseException e) { - e.printStackTrace(); + outKey.set(line); + output.collect(outKey, outValue); + } } } public void onARCOpen() throws IOException {} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |