From: <bra...@us...> - 2009-07-17 23:53:31
|
Revision: 2763 http://archive-access.svn.sourceforge.net/archive-access/?rev=2763&view=rev Author: bradtofel Date: 2009-07-17 23:28:06 +0000 (Fri, 17 Jul 2009) Log Message: ----------- FEATURE(ACC-38): Now attempts to time out fetches Modified Paths: -------------- branches/wayback-1_4_2/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java Added Paths: ----------- branches/wayback-1_4_2/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/TimeoutArchiveReaderFactory.java Modified: branches/wayback-1_4_2/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java =================================================================== --- branches/wayback-1_4_2/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java 2009-07-17 23:22:42 UTC (rev 2762) +++ branches/wayback-1_4_2/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java 2009-07-17 23:28:06 UTC (rev 2763) @@ -4,6 +4,7 @@ import java.io.IOException; import java.net.URL; +import org.archive.io.ArchiveReader; import org.archive.io.ArchiveRecord; import org.archive.io.arc.ARCReader; import org.archive.io.arc.ARCReaderFactory; @@ -60,21 +61,22 @@ } public static Resource getResource(URL url, long offset) - throws IOException, ResourceNotAvailableException { + throws IOException, ResourceNotAvailableException { + Resource r = null; - String name = url.getFile(); - if (isArc(name)) { - - ARCReader reader = ARCReaderFactory.get(url, offset); - r = ARCArchiveRecordToResource(reader.get(),reader); - - } else if (isWarc(name)) { - - WARCReader reader = WARCReaderFactory.get(url, offset); - r = WARCArchiveRecordToResource(reader.get(),reader); - + // TODO: allow configuration of timeouts -- now using defaults.. + TimeoutArchiveReaderFactory tarf = new TimeoutArchiveReaderFactory(); + ArchiveReader reader = tarf.getArchiveReader(url,offset); + if(reader instanceof ARCReader) { + ARCReader areader = (ARCReader) reader; + r = ARCArchiveRecordToResource(areader.get(),areader); + + } else if(reader instanceof WARCReader) { + WARCReader wreader = (WARCReader) reader; + r = WARCArchiveRecordToResource(wreader.get(),wreader); + } else { - throw new ResourceNotAvailableException("Unknown extension"); + throw new ResourceNotAvailableException("Unknown ArchiveReader"); } return r; } @@ -91,7 +93,7 @@ || name.endsWith(ArcWarcFilenameFilter.WARC_GZ_SUFFIX)); } - private static Resource ARCArchiveRecordToResource(ArchiveRecord rec, + public static Resource ARCArchiveRecordToResource(ArchiveRecord rec, ARCReader reader) throws ResourceNotAvailableException, IOException { if (!(rec instanceof ARCRecord)) { @@ -102,7 +104,7 @@ return ar; } - private static Resource WARCArchiveRecordToResource(ArchiveRecord rec, + public static Resource WARCArchiveRecordToResource(ArchiveRecord rec, WARCReader reader) throws ResourceNotAvailableException, IOException { if (!(rec instanceof WARCRecord)) { Added: branches/wayback-1_4_2/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/TimeoutArchiveReaderFactory.java =================================================================== --- branches/wayback-1_4_2/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/TimeoutArchiveReaderFactory.java (rev 0) +++ branches/wayback-1_4_2/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/TimeoutArchiveReaderFactory.java 2009-07-17 23:28:06 UTC (rev 2763) @@ -0,0 +1,58 @@ +package org.archive.wayback.resourcestore.resourcefile; + +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLConnection; + +import org.archive.io.ArchiveReader; +import org.archive.io.ArchiveReaderFactory; + +/** + * Sad but needed subclass of the ArchiveReaderFactory, allows config of + * timeouts for connect and reads on underlying HTTP connections, and overrides + * the one getArchiveReader(URL,long) method to enable setting the timeouts. + * + * This functionality should be moved into the ArchiveReaderFactory. + * + * @author brad + * + */ +public class TimeoutArchiveReaderFactory extends ArchiveReaderFactory { + + private final static int STREAM_ALL = -1; + private int connectTimeout = 10000; + private int readTimeout = 10000; + public TimeoutArchiveReaderFactory(int connectTimeout, int readTimeout) { + this.connectTimeout = connectTimeout; + this.readTimeout = readTimeout; + } + + public TimeoutArchiveReaderFactory(int timeout) { + this.connectTimeout = timeout; + this.readTimeout = timeout; + } + public TimeoutArchiveReaderFactory() { + } + protected ArchiveReader getArchiveReader(final URL f, final long offset) + throws IOException { + + // Get URL connection. + URLConnection connection = f.openConnection(); + if (connection instanceof HttpURLConnection) { + addUserAgent((HttpURLConnection)connection); + } + if (offset != STREAM_ALL) { + // Use a Range request (Assumes HTTP 1.1 on other end). If + // length >= 0, add open-ended range header to the request. Else, + // because end-byte is inclusive, subtract 1. + connection.addRequestProperty("Range", "bytes=" + offset + "-"); + } + + connection.setConnectTimeout(connectTimeout); + connection.setReadTimeout(readTimeout); + + return getArchiveReader(f.toString(), connection.getInputStream(), + (offset == 0)); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |