From: <bra...@us...> - 2009-05-20 05:36:57
|
Revision: 2705 http://archive-access.svn.sourceforge.net/archive-access/?rev=2705&view=rev Author: bradtofel Date: 2009-05-20 00:40:05 +0000 (Wed, 20 May 2009) Log Message: ----------- MINOR-REFACTOR: moved common error checking into single method. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2009-05-05 22:17:48 UTC (rev 2704) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2009-05-20 00:40:05 UTC (rev 2705) @@ -44,6 +44,12 @@ public abstract long getRecordLength(); public abstract Map<String,String> getHttpHeaders(); + private void validate() throws IOException { + if(is == null) { + throw new IOException("No InputStream"); + } + } + protected void setInputStream(InputStream is) { if(is.markSupported()) { this.is = is; @@ -57,9 +63,7 @@ * @see java.io.BufferedInputStream#available() */ public int available() throws IOException { - if(is == null) { - throw new IOException("No InputStream"); - } + validate(); return is.available(); } /** @@ -87,9 +91,7 @@ * @see java.io.BufferedInputStream#read() */ public int read() throws IOException { - if(is == null) { - throw new IOException("No InputStream"); - } + validate(); return is.read(); } /** @@ -101,9 +103,7 @@ * @see java.io.BufferedInputStream#read(byte[], int, int) */ public int read(byte[] b, int off, int len) throws IOException { - if(is == null) { - throw new IOException("No InputStream"); - } + validate(); return is.read(b, off, len); } /** @@ -113,9 +113,7 @@ * @see java.io.FilterInputStream#read(byte[]) */ public int read(byte[] b) throws IOException { - if(is == null) { - throw new IOException("No InputStream"); - } + validate(); return is.read(b); } /** @@ -123,9 +121,7 @@ * @see java.io.BufferedInputStream#reset() */ public void reset() throws IOException { - if(is == null) { - throw new IOException("No InputStream"); - } + validate(); is.reset(); } /** @@ -135,9 +131,7 @@ * @see java.io.BufferedInputStream#skip(long) */ public long skip(long n) throws IOException { - if(is == null) { - throw new IOException("No InputStream"); - } + validate(); return is.skip(n); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-03-20 01:14:14
|
Revision: 2991 http://archive-access.svn.sourceforge.net/archive-access/?rev=2991&view=rev Author: bradtofel Date: 2010-03-20 01:14:08 +0000 (Sat, 20 Mar 2010) Log Message: ----------- FEATURE: actually tries to devine if a stream is chunked or not before setting the chunked inputs stream wrapper. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-03-20 01:11:51 UTC (rev 2990) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-03-20 01:14:08 UTC (rev 2991) @@ -66,8 +66,49 @@ */ public void setChunkedEncoding() throws IOException { validate(); - is = new ChunkedInputStream(is); + // peek ahead and make sure we have a line with hex numbers: + int max = 50; + is.mark(max+2); + int cur = 0; + boolean isChunked = false; + while(cur < max) { + int nextC = is.read(); + if(nextC == 10) { + // must have read at least 1 hex char: + if(cur > 0) { + nextC = is.read(); + if(nextC == 13) { + isChunked = true; + break; + } + } + } else { + // better be a hex character: + if(!isHex(nextC)) { + break; + } + } + cur++; + } + is.reset(); + if(isChunked) { + is = new ChunkedInputStream(is); + } } + + private boolean isHex(int c) { + if((c >= '0') && (c <= '9')) { + return true; + } + if((c >= 'a') && (c <= 'f')) { + return true; + } + if((c >= 'A') && (c <= 'F')) { + return true; + } + return false; + } + /** * @return * @throws IOException This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-14 18:44:59
|
Revision: 3034 http://archive-access.svn.sourceforge.net/archive-access/?rev=3034&view=rev Author: bradtofel Date: 2010-04-14 18:44:53 +0000 (Wed, 14 Apr 2010) Log Message: ----------- BUGFIX(unreported): chunked peek-ahead was looking for 10-13 line-ending, not 13-10. Now it allows either, as well as a lone 10. COMMENT/JAVADOC update Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-04-14 02:33:08 UTC (rev 3033) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-04-14 18:44:53 UTC (rev 3034) @@ -34,6 +34,13 @@ * Abstraction on top of a document stored in a WaybackCollection. Currently * implemented subclasses include ArcResource and WarcResource. * + * This implementation needs some pretty drastic refactoring.. May have to wait + * for 2.0. This should be a byte-oriented record, and allow wrapping the + * interior byte-stream in on the more full featured HTTP libraries + * (jetty/apache-http-client/w3c-http-reference). + * + * For now, it is a system-wide assumption that all resources are HTTP based. + * * @author Brad Tofel * @version $Date$, $Revision$ */ @@ -42,8 +49,20 @@ private InputStream is; public abstract void close() throws IOException; + /** + * Assumes an HTTP resource - return the HTTP response code + * @return the HTTP response code from the HTTP message + */ public abstract int getStatusCode(); + /** + * @return the size in bytes of the record payload, including HTTP header + */ public abstract long getRecordLength(); + /** + * Assumes an HTTP response - return the HTTP headers, not including the + * HTTP Message header + * @return key-value Map of HTTP headers + */ public abstract Map<String,String> getHttpHeaders(); private void validate() throws IOException { @@ -59,10 +78,13 @@ this.is = new BufferedInputStream(is); } } + /** * indicate that there is a Transfer-Encoding: chunked header, so the input - * data should be dechunked as it is read. - * @throws IOException + * data should be dechunked as it is read. This method actually peeks + * ahead to verify that there is a hex-encoded chunk length before + * assuming the data is chunked. + * @throws IOException for usual reasons */ public void setChunkedEncoding() throws IOException { validate(); @@ -70,21 +92,31 @@ int max = 50; is.mark(max+2); int cur = 0; + int hexFound = 0; boolean isChunked = false; while(cur < max) { int nextC = is.read(); - if(nextC == 10) { + // allow CRLF and plain ole LF: + if((nextC == 13) || (nextC == 10)) { // must have read at least 1 hex char: - if(cur > 0) { + if(hexFound > 0) { + if(nextC == 10) { + isChunked = true; + break; + } nextC = is.read(); - if(nextC == 13) { + if(nextC == 10) { isChunked = true; break; } } + // keep looking to allow some blank lines. } else { // better be a hex character: - if(!isHex(nextC)) { + if(isHex(nextC)) { + hexFound++; + } else { + // not a hex digit: not a chunked stream. break; } } @@ -108,80 +140,45 @@ } return false; } - - /** - * @return - * @throws IOException - * @see java.io.BufferedInputStream#available() - */ + public int available() throws IOException { validate(); return is.available(); } - /** - * @param readlimit - * @see java.io.BufferedInputStream#mark(int) - */ + public void mark(int readlimit) { if(is != null) { is.mark(readlimit); } } - /** - * @return - * @see java.io.BufferedInputStream#markSupported() - */ + public boolean markSupported() { if(is == null) { return false; } return is.markSupported(); } - /** - * @return - * @throws IOException - * @see java.io.BufferedInputStream#read() - */ + public int read() throws IOException { validate(); return is.read(); } - /** - * @param b - * @param off - * @param len - * @return - * @throws IOException - * @see java.io.BufferedInputStream#read(byte[], int, int) - */ + public int read(byte[] b, int off, int len) throws IOException { validate(); return is.read(b, off, len); } - /** - * @param b - * @return - * @throws IOException - * @see java.io.FilterInputStream#read(byte[]) - */ + public int read(byte[] b) throws IOException { validate(); return is.read(b); } - /** - * @throws IOException - * @see java.io.BufferedInputStream#reset() - */ + public void reset() throws IOException { validate(); is.reset(); } - /** - * @param n - * @return - * @throws IOException - * @see java.io.BufferedInputStream#skip(long) - */ + public long skip(long n) throws IOException { validate(); return is.skip(n); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-04-24 00:22:46
|
Revision: 3058 http://archive-access.svn.sourceforge.net/archive-access/?rev=3058&view=rev Author: bradtofel Date: 2010-04-24 00:22:40 +0000 (Sat, 24 Apr 2010) Log Message: ----------- BUGFIX: was not ensuring the ChunkedInputStream was buffered, which is required for charset detection.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-04-24 00:20:49 UTC (rev 3057) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-04-24 00:22:40 UTC (rev 3058) @@ -124,7 +124,7 @@ } is.reset(); if(isChunked) { - is = new ChunkedInputStream(is); + setInputStream(new ChunkedInputStream(is)); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2010-05-17 19:28:26
|
Revision: 3094 http://archive-access.svn.sourceforge.net/archive-access/?rev=3094&view=rev Author: bradtofel Date: 2010-05-17 19:28:20 +0000 (Mon, 17 May 2010) Log Message: ----------- BUGFIX: now allows spaces(' ') after the chunk hex byte length when detecting chunked contents. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-05-17 19:27:12 UTC (rev 3093) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-05-17 19:28:20 UTC (rev 3094) @@ -115,7 +115,8 @@ // better be a hex character: if(isHex(nextC)) { hexFound++; - } else { + } else if(nextC != ' ') { + // allow whitespace before or after chunk... // not a hex digit: not a chunked stream. break; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |