From: <jle...@us...> - 2007-05-12 07:44:36
|
Revision: 1745 http://archive-access.svn.sourceforge.net/archive-access/?rev=1745&view=rev Author: jlee-archive Date: 2007-05-12 00:44:36 -0700 (Sat, 12 May 2007) Log Message: ----------- Added getArcName() and other small changes to allow subclasses of ImportArcs to specify a collectionName per AR C, not just per import job. Modified Paths: -------------- trunk/archive-access/projects/nutchwax/nutchwax-core/src/main/java/org/archive/access/nutch/jobs/ImportArcs.java Modified: trunk/archive-access/projects/nutchwax/nutchwax-core/src/main/java/org/archive/access/nutch/jobs/ImportArcs.java =================================================================== --- trunk/archive-access/projects/nutchwax/nutchwax-core/src/main/java/org/archive/access/nutch/jobs/ImportArcs.java 2007-05-03 06:48:32 UTC (rev 1744) +++ trunk/archive-access/projects/nutchwax/nutchwax-core/src/main/java/org/archive/access/nutch/jobs/ImportArcs.java 2007-05-12 07:44:36 UTC (rev 1745) @@ -202,7 +202,7 @@ job.setMapRunnerClass(job.getClass("wax.import.maprunner", ARCMapRunner.class)); - job.setMapperClass(job.getClass("wax.import.mapper", ImportArcs.class)); + job.setMapperClass(job.getClass("wax.import.mapper", this.getClass())); job.setInputFormat(TextInputFormat.class); @@ -275,7 +275,7 @@ public void onARCClose() { // Nothing to do. } - + public void map(final WritableComparable key, final Writable value, final OutputCollector output, final Reporter r) throws IOException { @@ -286,13 +286,13 @@ ARCReporter reporter = (ARCReporter)r; // Its null first time map is called on an ARC. - if (this.arcName == null) { - this.arcName = getARCName(rec.getMetaData()); + checkArcName(rec); + + if (!isIndex(rec)) + { + return; } - - if (!isIndex(rec)) { - return; - } + checkCollectionName(); final ARCRecordMetaData arcData = rec.getMetaData(); @@ -462,7 +462,22 @@ parse != null ? new ParseImpl(parse) : null); output.collect(Nutchwax.generateWaxKey(url, this.collectionName), v); } - + + public void setCollectionName(String collectionName) { + this.collectionName = collectionName; + checkCollectionName(); + } + + public String getArcName() { + return this.arcName; + } + + public void checkArcName(ARCRecord rec) { + if ((this.arcName == null) || this.arcName.length() <= 0) { + this.arcName = getARCName(rec.getMetaData()); + } + } + protected boolean checkCollectionName() { if ((this.collectionName != null) && this.collectionName.length() > 0) { return true; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |