Revision: 3571
http://archive-access.svn.sourceforge.net/archive-access/?rev=3571&view=rev
Author: bradtofel
Date: 2011-11-19 00:57:07 +0000 (Sat, 19 Nov 2011)
Log Message:
-----------
Using old ZipNumRecordWriter - new one has a bug
Modified Paths:
--------------
trunk/archive-access/projects/ia-tools/src/main/java/org/archive/hadoop/mapreduce/ZipNumOutputFormat.java
trunk/archive-access/projects/ia-tools/src/main/java/org/archive/hadoop/mapreduce/ZipNumRecordWriter.java
Modified: trunk/archive-access/projects/ia-tools/src/main/java/org/archive/hadoop/mapreduce/ZipNumOutputFormat.java
===================================================================
--- trunk/archive-access/projects/ia-tools/src/main/java/org/archive/hadoop/mapreduce/ZipNumOutputFormat.java 2011-11-19 00:50:24 UTC (rev 3570)
+++ trunk/archive-access/projects/ia-tools/src/main/java/org/archive/hadoop/mapreduce/ZipNumOutputFormat.java 2011-11-19 00:57:07 UTC (rev 3571)
@@ -51,7 +51,10 @@
FSDataOutputStream mainOut = mainFs.create(mainFile, false);
FSDataOutputStream summaryOut = summaryFs.create(summaryFile, false);
if(dayLimit == -1) {
- return new ZipNumRecordWriter(count, mainOut, summaryOut);
+ // This (should be) a better implementation, but appears to have a
+ // bug - summary files are empty in some cases.. Should track it down
+// return new ZipNumRecordWriter(count, mainOut, summaryOut);
+ return new ZipNumRecordWriterOld(count, mainOut, summaryOut);
} else {
return new OvercrawlZipNumRecordWriter(count,dayLimit, mainOut, summaryOut);
}
Modified: trunk/archive-access/projects/ia-tools/src/main/java/org/archive/hadoop/mapreduce/ZipNumRecordWriter.java
===================================================================
--- trunk/archive-access/projects/ia-tools/src/main/java/org/archive/hadoop/mapreduce/ZipNumRecordWriter.java 2011-11-19 00:50:24 UTC (rev 3570)
+++ trunk/archive-access/projects/ia-tools/src/main/java/org/archive/hadoop/mapreduce/ZipNumRecordWriter.java 2011-11-19 00:57:07 UTC (rev 3571)
@@ -9,6 +9,12 @@
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.archive.format.gzip.zipnum.ZipNumWriter;
+/**
+ * Warning - this has a bug.. leaves empty SUMMARY files in some cases.
+ *
+ * @author brad
+ *
+ */
public class ZipNumRecordWriter extends RecordWriter<Text, Text>{
protected ZipNumWriter znw;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|