From: <bra...@us...> - 2011-05-17 17:41:56
|
Revision: 3438 http://archive-access.svn.sourceforge.net/archive-access/?rev=3438&view=rev Author: bradtofel Date: 2011-05-17 17:41:50 +0000 (Tue, 17 May 2011) Log Message: ----------- BUGFIX(unreported): No longer relies on Heritrix ArchiveReader to perform seek() operation, as this behavior seems to have been broken. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcResource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/WarcResource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcResource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcResource.java 2011-04-19 22:41:25 UTC (rev 3437) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcResource.java 2011-05-17 17:41:50 UTC (rev 3438) @@ -28,8 +28,8 @@ import java.util.Set; import org.apache.commons.httpclient.Header; +import org.archive.io.ArchiveReader; import org.archive.io.ArchiveRecord; -import org.archive.io.arc.ARCReader; import org.archive.io.arc.ARCRecord; import org.archive.wayback.core.Resource; import org.archive.wayback.replay.HttpHeaderOperation; @@ -54,7 +54,7 @@ * object for ARCReader -- need to hold on to this in order to call close() * to release filehandle after completing access to this record. optional */ - ARCReader arcReader = null; + ArchiveReader arcReader = null; /** * flag to indicate if the ARCRecord skipHTTPHeader() has been called */ @@ -71,7 +71,7 @@ * @param rec * @param reader */ - public ArcResource(final ARCRecord rec,final ARCReader reader) { + public ArcResource(final ARCRecord rec,final ArchiveReader reader) { super(); arcRecord = rec; arcReader = reader; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java 2011-04-19 22:41:25 UTC (rev 3437) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java 2011-05-17 17:41:50 UTC (rev 3438) @@ -20,7 +20,10 @@ package org.archive.wayback.resourcestore.resourcefile; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.RandomAccessFile; import java.net.URL; import org.archive.io.ArchiveReader; @@ -62,15 +65,18 @@ name = name.substring(0, name.length() - ArcWarcFilenameFilter.OPEN_SUFFIX.length()); } + RandomAccessFile raf = new RandomAccessFile(file, "r"); + raf.seek(offset); + InputStream is = new FileInputStream(raf.getFD()); + String fPath = file.getAbsolutePath(); if (isArc(name)) { + ArchiveReader reader = ARCReaderFactory.get(fPath, is, false); + r = ARCArchiveRecordToResource(reader.get(), reader); - ARCReader reader = ARCReaderFactory.get(file,offset); - r = ARCArchiveRecordToResource(reader.get(),reader); - } else if (isWarc(name)) { - WARCReader reader = WARCReaderFactory.get(file,offset); - r = WARCArchiveRecordToResource(reader.get(),reader); + ArchiveReader reader = WARCReaderFactory.get(fPath, is, false); + r = WARCArchiveRecordToResource(reader.get(), reader); } else { throw new ResourceNotAvailableException("Unknown extension"); @@ -78,7 +84,6 @@ return r; } - public static Resource getResource(URL url, long offset) throws IOException, ResourceNotAvailableException { @@ -114,7 +119,7 @@ } public static Resource ARCArchiveRecordToResource(ArchiveRecord rec, - ARCReader reader) throws ResourceNotAvailableException, IOException { + ArchiveReader reader) throws ResourceNotAvailableException, IOException { if (!(rec instanceof ARCRecord)) { throw new ResourceNotAvailableException("Bad ARCRecord format"); @@ -125,7 +130,7 @@ } public static Resource WARCArchiveRecordToResource(ArchiveRecord rec, - WARCReader reader) throws ResourceNotAvailableException, IOException { + ArchiveReader reader) throws ResourceNotAvailableException, IOException { if (!(rec instanceof WARCRecord)) { throw new ResourceNotAvailableException("Bad WARCRecord format"); Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/WarcResource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/WarcResource.java 2011-04-19 22:41:25 UTC (rev 3437) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/WarcResource.java 2011-05-17 17:41:50 UTC (rev 3438) @@ -27,21 +27,21 @@ import org.apache.commons.httpclient.HttpParser; import org.apache.commons.httpclient.StatusLine; import org.apache.commons.httpclient.util.EncodingUtil; +import org.archive.io.ArchiveReader; import org.archive.io.RecoverableIOException; import org.archive.io.arc.ARCConstants; -import org.archive.io.warc.WARCReader; import org.archive.io.warc.WARCRecord; import org.archive.wayback.core.Resource; import org.archive.wayback.replay.HttpHeaderOperation; public class WarcResource extends Resource { private WARCRecord rec = null; - private WARCReader reader = null; + private ArchiveReader reader = null; private Map<String, String> headers = null; private long length = 0; private int status = 0; private boolean parsedHeaders = false; - public WarcResource(WARCRecord rec, WARCReader reader) { + public WarcResource(WARCRecord rec, ArchiveReader reader) { this.rec = rec; this.reader = reader; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |