Revision: 1720 http://archive-access.svn.sourceforge.net/archive-access/?rev=1720&view=rev Author: bradtofel Date: 2007-04-10 18:07:44 -0700 (Tue, 10 Apr 2007) Log Message: ----------- Interface: MapClass now implements ARCRecordMapper, so it can be used with current archive-mapred code. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java Modified: trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java 2007-04-10 23:39:13 UTC (rev 1719) +++ trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java 2007-04-11 01:07:44 UTC (rev 1720) @@ -25,6 +25,7 @@ import org.apache.hadoop.util.ReflectionUtils; import org.archive.io.arc.ARCRecord; import org.archive.mapred.ARCMapRunner; +import org.archive.mapred.ARCRecordMapper; import org.archive.wayback.resourceindex.indexer.ArcIndexer; /** @@ -43,7 +44,7 @@ * @author brad * @version $Date$, $Revision$ */ - public static class MapClass extends MapReduceBase implements Mapper { + public static class MapClass extends MapReduceBase implements ARCRecordMapper { private Text outKey = new Text(); private Text outValue = new Text(""); @@ -61,6 +62,8 @@ e.printStackTrace(); } } + public void onARCOpen() throws IOException {} + public void onARCClose() throws IOException {} } static void printUsage() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 1933 http://archive-access.svn.sourceforge.net/archive-access/?rev=1933&view=rev Author: bradtofel Date: 2007-08-23 15:52:33 -0700 (Thu, 23 Aug 2007) Log Message: ----------- REFACTOR: now uses the various CDX-SearchResult-BDB adapters. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java Modified: trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java 2007-08-23 22:51:18 UTC (rev 1932) +++ trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java 2007-08-23 22:52:33 UTC (rev 1933) @@ -26,7 +26,10 @@ import org.archive.io.arc.ARCRecord; import org.archive.mapred.ARCMapRunner; import org.archive.mapred.ARCRecordMapper; -import org.archive.wayback.resourceindex.indexer.ArcIndexer; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.resourcestore.ArcIndexer; +import org.archive.wayback.resourcestore.ARCRecordToSearchResultAdapter; +import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; /** * Hadoop Driver for generation of alphabetically partitioned Wayback CDX @@ -48,18 +51,24 @@ private Text outKey = new Text(); private Text outValue = new Text(""); + private ARCRecordToSearchResultAdapter ARtoSR = + new ARCRecordToSearchResultAdapter(); + private SearchResultToCDXLineAdapter SRtoCDX = + new SearchResultToCDXLineAdapter(); + public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter) throws IOException { ObjectWritable ow = (ObjectWritable) value; ARCRecord rec = (ARCRecord) ow.get(); String line; - try { - line = ArcIndexer.arcRecordToCDXLine(rec); + SearchResult result = ARtoSR.adapt(rec); + if(result != null) { + line = SRtoCDX.adapt(result); + if(line != null) { - outKey.set(line); - output.collect(outKey, outValue); - } catch (ParseException e) { - e.printStackTrace(); + outKey.set(line); + output.collect(outKey, outValue); + } } } public void onARCOpen() throws IOException {} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 1959 http://archive-access.svn.sourceforge.net/archive-access/?rev=1959&view=rev Author: bradtofel Date: 2007-08-27 14:35:51 -0700 (Mon, 27 Aug 2007) Log Message: ----------- TWEAK: removed unused imports. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java Modified: trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java 2007-08-27 21:35:03 UTC (rev 1958) +++ trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java 2007-08-27 21:35:51 UTC (rev 1959) @@ -2,7 +2,6 @@ import java.io.IOException; -import java.text.ParseException; import java.util.ArrayList; import java.util.Date; import java.util.List; @@ -17,7 +16,6 @@ import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; -import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.TextOutputFormat; @@ -27,7 +25,6 @@ import org.archive.mapred.ARCMapRunner; import org.archive.mapred.ARCRecordMapper; import org.archive.wayback.core.SearchResult; -import org.archive.wayback.resourcestore.ArcIndexer; import org.archive.wayback.resourcestore.ARCRecordToSearchResultAdapter; import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
Revision: 2385 http://archive-access.svn.sourceforge.net/archive-access/?rev=2385&view=rev Author: bradtofel Date: 2008-07-01 17:16:07 -0700 (Tue, 01 Jul 2008) Log Message: ----------- REFACTOR: SearchResult => (Url|Capture)SearchResult Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java Modified: trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java 2008-07-02 00:15:22 UTC (rev 2384) +++ trunk/archive-access/projects/wayback/wayback-mapreduce-prereq/src/main/java/org/archive/wayback/resourceindex/indexer/hadoop/Driver.java 2008-07-02 00:16:07 UTC (rev 2385) @@ -24,8 +24,8 @@ import org.archive.io.arc.ARCRecord; import org.archive.mapred.ARCMapRunner; import org.archive.mapred.ARCRecordMapper; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.resourcestore.ARCRecordToSearchResultAdapter; +import org.archive.wayback.core.CaptureSearchResult; +import org.archive.wayback.resourcestore.indexer.ARCRecordToSearchResultAdapter; import org.archive.wayback.resourceindex.cdx.SearchResultToCDXLineAdapter; /** @@ -58,7 +58,7 @@ ObjectWritable ow = (ObjectWritable) value; ARCRecord rec = (ARCRecord) ow.get(); String line; - SearchResult result = ARtoSR.adapt(rec); + CaptureSearchResult result = ARtoSR.adapt(rec); if(result != null) { line = SRtoCDX.adapt(result); if(line != null) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |