From: <bra...@us...> - 2010-06-01 18:53:01
|
Revision: 3133 http://archive-access.svn.sourceforge.net/archive-access/?rev=3133&view=rev Author: bradtofel Date: 2010-06-01 18:52:55 +0000 (Tue, 01 Jun 2010) Log Message: ----------- BUGFIX: was not closing recorder in exception situations Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-06-01 18:46:00 UTC (rev 3132) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-06-01 18:52:55 UTC (rev 3133) @@ -139,7 +139,6 @@ LOGGER.info("URL(" + url + ") HTTP:" + code); ByteOp.discardStream(getMethod.getResponseBodyAsStream()); getMethod.releaseConnection(); - recorder.closeRecorders(); gotUrl = true; } catch (URIException e) { @@ -157,6 +156,7 @@ // } catch (IOException e) { // e.printStackTrace(); } finally { + recorder.closeRecorders(); Recorder.setHttpRecorder(null); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-06-24 20:27:24
|
Revision: 3163 http://archive-access.svn.sourceforge.net/archive-access/?rev=3163&view=rev Author: bradtofel Date: 2010-06-24 20:27:18 +0000 (Thu, 24 Jun 2010) Log Message: ----------- BUGFIX(unreported) need different Recorder filename for each Thread.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-06-24 20:26:25 UTC (rev 3162) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-06-24 20:27:18 UTC (rev 3163) @@ -116,8 +116,8 @@ // to track if we got a response (any response) or an exception. boolean gotUrl = false; - - Recorder recorder = new Recorder(recorderCacheDir,backingFileBase, + String fName = backingFileBase + "-" + Thread.currentThread().getId(); + Recorder recorder = new Recorder(recorderCacheDir,fName, outBufferSize, inBufferSize); ExtendedGetMethod getMethod = null; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-07-20 23:51:54
|
Revision: 3179 http://archive-access.svn.sourceforge.net/archive-access/?rev=3179&view=rev Author: bradtofel Date: 2010-07-20 23:51:47 +0000 (Tue, 20 Jul 2010) Log Message: ----------- BUGFIX: was not properly setting the User-Agent on output requests Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-07-20 23:50:57 UTC (rev 3178) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-07-20 23:51:47 UTC (rev 3179) @@ -88,7 +88,8 @@ manager.getParams().setSoTimeout(socketTimeoutMS); http.setHttpConnectionManager(manager); HttpClientParams clientParams = new HttpClientParams(); - clientParams.setParameter("http.useragent", userAgent); +// LOGGER.warn("Setting HTTP UserAgent to " + userAgent); +// clientParams.setParameter("http.useragent", userAgent); return http; } }; @@ -135,6 +136,7 @@ HttpClient client = getHttpClient(); getMethod.getParams().setCookiePolicy(CookiePolicy.IGNORE_COOKIES); getMethod.setFollowRedirects(false); + getMethod.setRequestHeader("User-Agent", userAgent); int code = client.executeMethod(getMethod); LOGGER.info("URL(" + url + ") HTTP:" + code); ByteOp.discardStream(getMethod.getResponseBodyAsStream()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-09-07 22:14:48
|
Revision: 3249 http://archive-access.svn.sourceforge.net/archive-access/?rev=3249&view=rev Author: bradtofel Date: 2010-09-07 22:14:41 +0000 (Tue, 07 Sep 2010) Log Message: ----------- BUGFIX(unreported) last checkin left in "debug" code which dumped original content to STDOUT... Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-09-07 22:12:17 UTC (rev 3248) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-09-07 22:14:41 UTC (rev 3249) @@ -138,8 +138,7 @@ getMethod.setRequestHeader("User-Agent", userAgent); int code = client.executeMethod(getMethod); LOGGER.info("URL(" + url + ") HTTP:" + code); -// ByteOp.discardStream(getMethod.getResponseBodyAsStream()); - ByteOp.copyStream(getMethod.getResponseBodyAsStream(), System.out); + ByteOp.discardStream(getMethod.getResponseBodyAsStream()); getMethod.releaseConnection(); gotUrl = true; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-11-19 02:52:03
|
Revision: 3343 http://archive-access.svn.sourceforge.net/archive-access/?rev=3343&view=rev Author: bradtofel Date: 2010-11-19 02:51:57 +0000 (Fri, 19 Nov 2010) Log Message: ----------- TWEAK: closing GetMethod's inputstream after using, moved, GetMethod.releaseConnections() to finally{} block. All attempts to make sure we don't leave filehandles open Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-11-19 02:47:32 UTC (rev 3342) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2010-11-19 02:51:57 UTC (rev 3343) @@ -132,8 +132,9 @@ getMethod.setRequestHeader("User-Agent", userAgent); int code = client.executeMethod(getMethod); LOGGER.info("URL(" + url + ") HTTP:" + code); - ByteOp.discardStream(getMethod.getResponseBodyAsStream()); - getMethod.releaseConnection(); + InputStream responseIS = getMethod.getResponseBodyAsStream(); + ByteOp.discardStream(responseIS); + responseIS.close(); gotUrl = true; } catch (URIException e) { @@ -156,6 +157,9 @@ } finally { recorder.closeRecorders(); Recorder.setHttpRecorder(null); + if(getMethod != null) { + getMethod.releaseConnection(); + } } // now write the content, or a fake record: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-02-06 14:51:31
|
Revision: 3406 http://archive-access.svn.sourceforge.net/archive-access/?rev=3406&view=rev Author: bradtofel Date: 2011-02-06 14:51:25 +0000 (Sun, 06 Feb 2011) Log Message: ----------- BUGFIX: no longer catching and eating ConnectTimeoutExceptions Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2011-02-06 14:49:24 UTC (rev 3405) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2011-02-06 14:51:25 UTC (rev 3406) @@ -38,8 +38,10 @@ import org.apache.commons.httpclient.SimpleHttpConnectionManager; import org.apache.commons.httpclient.URIException; import org.apache.commons.httpclient.cookie.CookiePolicy; +import org.apache.commons.io.IOUtils; import org.archive.httpclient.HttpRecorderGetMethod; import org.archive.io.RecordingInputStream; +import org.archive.io.ReplayInputStream; import org.archive.io.arc.ARCWriter; import org.archive.net.LaxURI; import org.archive.util.Recorder; @@ -140,10 +142,10 @@ } catch (URIException e) { e.printStackTrace(); } catch (UnknownHostException e) { - LOGGER.warning("Unknown host for " + url); - } catch (ConnectTimeoutException e) { - // TODO: should we act like it's a full block? - LOGGER.warning("Timeout out connecting to " + url); + LOGGER.warning("Unknown host for " + url); +// } catch (ConnectTimeoutException e) { +// // TODO: should we act like it's a full block? +// LOGGER.warning("Timeout out connecting to " + url); } catch (ConnectException e) { LOGGER.warning("ConnectionRefused to " + url); @@ -164,21 +166,24 @@ // now write the content, or a fake record: ARCWriter writer = null; + ReplayInputStream replayIS = null; try { writer = cache.getWriter(); if(gotUrl) { RecordingInputStream ris = recorder.getRecordedInput(); + replayIS = ris.getReplayInputStream(); region = storeInputStreamARCRecord(writer, url, getMethod.getMime(), getMethod.getRemoteIP(), getMethod.getCaptureDate(), - ris.getReplayInputStream(), (int) ris.getSize()); + replayIS, (int) ris.getSize()); } else { region = storeNotAvailable(writer, url); } } finally { + IOUtils.closeQuietly(replayIS); if(writer != null) { cache.returnWriter(writer); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-03-09 05:45:02
|
Revision: 3423 http://archive-access.svn.sourceforge.net/archive-access/?rev=3423&view=rev Author: bradtofel Date: 2011-03-09 05:44:56 +0000 (Wed, 09 Mar 2011) Log Message: ----------- BUGFIXES(unreported): Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2011-03-09 05:41:43 UTC (rev 3422) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2011-03-09 05:44:56 UTC (rev 3423) @@ -24,6 +24,8 @@ import java.io.IOException; import java.io.InputStream; import java.net.ConnectException; +import java.net.NoRouteToHostException; +import java.net.SocketException; import java.net.UnknownHostException; import java.util.Date; import java.util.logging.Logger; @@ -34,10 +36,13 @@ import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpConnection; import org.apache.commons.httpclient.HttpException; +import org.apache.commons.httpclient.HttpMethod; +import org.apache.commons.httpclient.HttpMethodRetryHandler; import org.apache.commons.httpclient.HttpState; import org.apache.commons.httpclient.SimpleHttpConnectionManager; import org.apache.commons.httpclient.URIException; import org.apache.commons.httpclient.cookie.CookiePolicy; +import org.apache.commons.httpclient.params.HttpClientParams; import org.apache.commons.io.IOUtils; import org.archive.httpclient.HttpRecorderGetMethod; import org.archive.io.RecordingInputStream; @@ -76,16 +81,18 @@ private int inBufferSize = 1024 * 100; // private int outBufferSize = 10; // private int inBufferSize = 100; + private final static HttpMethodRetryHandler noRetryHandler = + new NoRetryHandler(); private final ThreadLocal<HttpClient> tl = new ThreadLocal<HttpClient>() { protected synchronized HttpClient initialValue() { - HttpClient http = new HttpClient(); + HttpClientParams params = new HttpClientParams(); + params.setParameter(HttpClientParams.RETRY_HANDLER, noRetryHandler); IPHttpConnectionManager manager = new IPHttpConnectionManager(); manager.getParams().setConnectionTimeout(connectionTimeoutMS); manager.getParams().setSoTimeout(socketTimeoutMS); - http.setHttpConnectionManager(manager); - return http; + return new HttpClient(params, manager); } }; @@ -134,9 +141,11 @@ getMethod.setRequestHeader("User-Agent", userAgent); int code = client.executeMethod(getMethod); LOGGER.info("URL(" + url + ") HTTP:" + code); - InputStream responseIS = getMethod.getResponseBodyAsStream(); - ByteOp.discardStream(responseIS); - responseIS.close(); + InputStream responseIS = getMethod.getResponseBodyAsStream(); + if(responseIS != null) { + ByteOp.discardStream(responseIS); + responseIS.close(); + } gotUrl = true; } catch (URIException e) { @@ -148,7 +157,11 @@ // LOGGER.warning("Timeout out connecting to " + url); } catch (ConnectException e) { LOGGER.warning("ConnectionRefused to " + url); - + } catch (NoRouteToHostException e) { + LOGGER.warning("NoRouteToHost for " + url); + } catch (SocketException e) { + // should only be things like "Connection Reset", etc.. + LOGGER.warning("SocketException for " + url); } catch (HttpException e) { e.printStackTrace(); // we have to let IOExceptions out, problems caused by local disk This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2011-10-25 00:54:41
|
Revision: 3543 http://archive-access.svn.sourceforge.net/archive-access/?rev=3543&view=rev Author: bradtofel Date: 2011-10-25 00:54:34 +0000 (Tue, 25 Oct 2011) Log Message: ----------- FEATURE: Now uses special ProtocolSocketFactory to record DNS lookup times FEATURE: Now returns 504 Gateway Timeout for timeout situations Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2011-10-25 00:53:34 UTC (rev 3542) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/URLtoARCCacher.java 2011-10-25 00:54:34 UTC (rev 3543) @@ -26,6 +26,7 @@ import java.net.ConnectException; import java.net.NoRouteToHostException; import java.net.SocketException; +import java.net.SocketTimeoutException; import java.net.UnknownHostException; import java.util.Date; import java.util.logging.Logger; @@ -43,6 +44,7 @@ import org.apache.commons.httpclient.URIException; import org.apache.commons.httpclient.cookie.CookiePolicy; import org.apache.commons.httpclient.params.HttpClientParams; +import org.apache.commons.httpclient.protocol.Protocol; import org.apache.commons.io.IOUtils; import org.archive.httpclient.HttpRecorderGetMethod; import org.archive.io.RecordingInputStream; @@ -90,6 +92,9 @@ HttpClientParams params = new HttpClientParams(); params.setParameter(HttpClientParams.RETRY_HANDLER, noRetryHandler); IPHttpConnectionManager manager = new IPHttpConnectionManager(); + Protocol dnsTimedProtocol = new Protocol("http", + new DNSTimingProtocolSocketFactory(), 80); + Protocol.registerProtocol("http", dnsTimedProtocol); manager.getParams().setConnectionTimeout(connectionTimeoutMS); manager.getParams().setSoTimeout(socketTimeoutMS); return new HttpClient(params, manager); @@ -102,9 +107,13 @@ private static byte[] ERROR_BYTES = "HTTP 502 Bad Gateway\n\n".getBytes(); - private static String ERROR_MIME = "unk"; + private static String ERROR_MIME = "unk"; private static String ERROR_IP = "0.0.0.0"; + private static byte[] TIMEOUT_BYTES = "HTTP 504 Gateway Timeout\n\n".getBytes(); + private static String TIMEOUT_MIME = "unk"; + private static String TIMEOUT_IP = "0.0.0.0"; + /** * @param url to cache * @param cache ARCCacheDirectory for storing result or faked result @@ -119,6 +128,7 @@ // to track if we got a response (any response) or an exception. boolean gotUrl = false; + boolean isTimeout = false; String fName = backingFileBase + "-" + Thread.currentThread().getId(); Recorder recorder = new Recorder(recorderCacheDir,fName, outBufferSize, inBufferSize); @@ -152,16 +162,21 @@ e.printStackTrace(); } catch (UnknownHostException e) { LOGGER.warning("Unknown host for " + url); -// } catch (ConnectTimeoutException e) { -// // TODO: should we act like it's a full block? -// LOGGER.warning("Timeout out connecting to " + url); + + } catch (ConnectTimeoutException e) { + // TODO: should we act like it's a full block? + LOGGER.warning("Timeout out connecting to " + url); + isTimeout = true; + } catch(SocketTimeoutException e) { + LOGGER.warning("Timeout out socket for " + url); + isTimeout = true; } catch (ConnectException e) { LOGGER.warning("ConnectionRefused to " + url); } catch (NoRouteToHostException e) { LOGGER.warning("NoRouteToHost for " + url); } catch (SocketException e) { // should only be things like "Connection Reset", etc.. - LOGGER.warning("SocketException for " + url); + LOGGER.warning("SocketException for " + url); } catch (HttpException e) { e.printStackTrace(); // we have to let IOExceptions out, problems caused by local disk @@ -190,7 +205,8 @@ getMethod.getMime(), getMethod.getRemoteIP(), getMethod.getCaptureDate(), replayIS, (int) ris.getSize()); - + } else if(isTimeout) { + region = storeTimeout(writer,url); } else { region = storeNotAvailable(writer, url); } @@ -240,6 +256,16 @@ return fr; } + private FileRegion storeTimeout(ARCWriter writer, String url) + throws IOException { + + ByteArrayInputStream bais = new ByteArrayInputStream(TIMEOUT_BYTES); + FileRegion fr = storeInputStreamARCRecord(writer, url, + TIMEOUT_MIME, TIMEOUT_IP, new Date(), bais, TIMEOUT_BYTES.length); + fr.isFake = true; + return fr; + } + /* * Get method which ferrets away the Content-Type header, the remote IP * and remembers when the HTTP Message header was received. This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |