From: <bra...@us...> - 2008-10-11 02:12:46
|
Revision: 2607 http://archive-access.svn.sourceforge.net/archive-access/?rev=2607&view=rev Author: bradtofel Date: 2008-10-11 02:12:37 +0000 (Sat, 11 Oct 2008) Log Message: ----------- ENHANCEMENT(ACC-38): added timeouts to HTTP requests for remote index and remote ARC/WARC documents. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/TimeoutArchiveReaderFactory.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2008-10-11 01:59:57 UTC (rev 2606) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java 2008-10-11 02:12:37 UTC (rev 2607) @@ -26,6 +26,8 @@ import java.io.File; import java.io.IOException; +import java.net.URL; +import java.net.URLConnection; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; @@ -71,7 +73,10 @@ .class.getName()); private String searchUrlBase; - + private int connectTimeout = 10000; + private int readTimeout = 10000; + + private DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); private static final String WB_XML_REQUEST_TAGNAME = "request"; @@ -333,7 +338,11 @@ // do an HTTP request, plus parse the result into an XML DOM protected Document getHttpDocument(String url) throws IOException, SAXException { - return (getDocumentBuilder()).parse(url); + URL u = new URL(url); + URLConnection conn = u.openConnection(); + conn.setConnectTimeout(connectTimeout); + conn.setReadTimeout(readTimeout); + return (getDocumentBuilder()).parse(conn.getInputStream(),url); } protected Document getFileDocument(File f) throws IOException, SAXException { @@ -365,4 +374,19 @@ public void setCanonicalizer(UrlCanonicalizer canonicalizer) { this.canonicalizer = canonicalizer; } + public int getConnectTimeout() { + return connectTimeout; + } + + public void setConnectTimeout(int connectTimeout) { + this.connectTimeout = connectTimeout; + } + + public int getReadTimeout() { + return readTimeout; + } + + public void setReadTimeout(int readTimeout) { + this.readTimeout = readTimeout; + } } \ No newline at end of file Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java 2008-10-11 01:59:57 UTC (rev 2606) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ResourceFactory.java 2008-10-11 02:12:37 UTC (rev 2607) @@ -4,6 +4,7 @@ import java.io.IOException; import java.net.URL; +import org.archive.io.ArchiveReader; import org.archive.io.ArchiveRecord; import org.archive.io.arc.ARCReader; import org.archive.io.arc.ARCReaderFactory; @@ -60,25 +61,27 @@ } public static Resource getResource(URL url, long offset) - throws IOException, ResourceNotAvailableException { + throws IOException, ResourceNotAvailableException { + Resource r = null; - String name = url.getFile(); - if (isArc(name)) { - - ARCReader reader = ARCReaderFactory.get(url, offset); - r = ARCArchiveRecordToResource(reader.get(),reader); - - } else if (isWarc(name)) { - - WARCReader reader = WARCReaderFactory.get(url, offset); - r = WARCArchiveRecordToResource(reader.get(),reader); - + // TODO: allow configuration of timeouts -- now using defaults.. + TimeoutArchiveReaderFactory tarf = new TimeoutArchiveReaderFactory(); + ArchiveReader reader = tarf.getArchiveReader(url,offset); + if(reader instanceof ARCReader) { + ARCReader areader = (ARCReader) reader; + r = ARCArchiveRecordToResource(areader.get(),areader); + + } else if(reader instanceof WARCReader) { + WARCReader wreader = (WARCReader) reader; + r = WARCArchiveRecordToResource(wreader.get(),wreader); + } else { - throw new ResourceNotAvailableException("Unknown extension"); + throw new ResourceNotAvailableException("Unknown ArchiveReader"); } return r; } - + + private static boolean isArc(final String name) { return (name.endsWith(ArcWarcFilenameFilter.ARC_SUFFIX) Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/TimeoutArchiveReaderFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/TimeoutArchiveReaderFactory.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/TimeoutArchiveReaderFactory.java 2008-10-11 02:12:37 UTC (rev 2607) @@ -0,0 +1,58 @@ +package org.archive.wayback.resourcestore.resourcefile; + +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLConnection; + +import org.archive.io.ArchiveReader; +import org.archive.io.ArchiveReaderFactory; + +/** + * Sad but needed subclass of the ArchiveReaderFactory, allows config of + * timeouts for connect and reads on underlying HTTP connections, and overrides + * the one getArchiveReader(URL,long) method to enable setting the timeouts. + * + * This functionality should be moved into the ArchiveReaderFactory. + * + * @author brad + * + */ +public class TimeoutArchiveReaderFactory extends ArchiveReaderFactory { + + private final static int STREAM_ALL = -1; + private int connectTimeout = 10000; + private int readTimeout = 10000; + public TimeoutArchiveReaderFactory(int connectTimeout, int readTimeout) { + this.connectTimeout = connectTimeout; + this.readTimeout = readTimeout; + } + + public TimeoutArchiveReaderFactory(int timeout) { + this.connectTimeout = timeout; + this.readTimeout = timeout; + } + public TimeoutArchiveReaderFactory() { + } + protected ArchiveReader getArchiveReader(final URL f, final long offset) + throws IOException { + + // Get URL connection. + URLConnection connection = f.openConnection(); + if (connection instanceof HttpURLConnection) { + addUserAgent((HttpURLConnection)connection); + } + if (offset != STREAM_ALL) { + // Use a Range request (Assumes HTTP 1.1 on other end). If + // length >= 0, add open-ended range header to the request. Else, + // because end-byte is inclusive, subtract 1. + connection.addRequestProperty("Range", "bytes=" + offset + "-"); + } + + connection.setConnectTimeout(connectTimeout); + connection.setReadTimeout(readTimeout); + + return getArchiveReader(f.toString(), connection.getInputStream(), + (offset == 0)); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |