From: <bra...@us...> - 2010-04-14 18:44:59
|
Revision: 3034 http://archive-access.svn.sourceforge.net/archive-access/?rev=3034&view=rev Author: bradtofel Date: 2010-04-14 18:44:53 +0000 (Wed, 14 Apr 2010) Log Message: ----------- BUGFIX(unreported): chunked peek-ahead was looking for 10-13 line-ending, not 13-10. Now it allows either, as well as a lone 10. COMMENT/JAVADOC update Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-04-14 02:33:08 UTC (rev 3033) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2010-04-14 18:44:53 UTC (rev 3034) @@ -34,6 +34,13 @@ * Abstraction on top of a document stored in a WaybackCollection. Currently * implemented subclasses include ArcResource and WarcResource. * + * This implementation needs some pretty drastic refactoring.. May have to wait + * for 2.0. This should be a byte-oriented record, and allow wrapping the + * interior byte-stream in on the more full featured HTTP libraries + * (jetty/apache-http-client/w3c-http-reference). + * + * For now, it is a system-wide assumption that all resources are HTTP based. + * * @author Brad Tofel * @version $Date$, $Revision$ */ @@ -42,8 +49,20 @@ private InputStream is; public abstract void close() throws IOException; + /** + * Assumes an HTTP resource - return the HTTP response code + * @return the HTTP response code from the HTTP message + */ public abstract int getStatusCode(); + /** + * @return the size in bytes of the record payload, including HTTP header + */ public abstract long getRecordLength(); + /** + * Assumes an HTTP response - return the HTTP headers, not including the + * HTTP Message header + * @return key-value Map of HTTP headers + */ public abstract Map<String,String> getHttpHeaders(); private void validate() throws IOException { @@ -59,10 +78,13 @@ this.is = new BufferedInputStream(is); } } + /** * indicate that there is a Transfer-Encoding: chunked header, so the input - * data should be dechunked as it is read. - * @throws IOException + * data should be dechunked as it is read. This method actually peeks + * ahead to verify that there is a hex-encoded chunk length before + * assuming the data is chunked. + * @throws IOException for usual reasons */ public void setChunkedEncoding() throws IOException { validate(); @@ -70,21 +92,31 @@ int max = 50; is.mark(max+2); int cur = 0; + int hexFound = 0; boolean isChunked = false; while(cur < max) { int nextC = is.read(); - if(nextC == 10) { + // allow CRLF and plain ole LF: + if((nextC == 13) || (nextC == 10)) { // must have read at least 1 hex char: - if(cur > 0) { + if(hexFound > 0) { + if(nextC == 10) { + isChunked = true; + break; + } nextC = is.read(); - if(nextC == 13) { + if(nextC == 10) { isChunked = true; break; } } + // keep looking to allow some blank lines. } else { // better be a hex character: - if(!isHex(nextC)) { + if(isHex(nextC)) { + hexFound++; + } else { + // not a hex digit: not a chunked stream. break; } } @@ -108,80 +140,45 @@ } return false; } - - /** - * @return - * @throws IOException - * @see java.io.BufferedInputStream#available() - */ + public int available() throws IOException { validate(); return is.available(); } - /** - * @param readlimit - * @see java.io.BufferedInputStream#mark(int) - */ + public void mark(int readlimit) { if(is != null) { is.mark(readlimit); } } - /** - * @return - * @see java.io.BufferedInputStream#markSupported() - */ + public boolean markSupported() { if(is == null) { return false; } return is.markSupported(); } - /** - * @return - * @throws IOException - * @see java.io.BufferedInputStream#read() - */ + public int read() throws IOException { validate(); return is.read(); } - /** - * @param b - * @param off - * @param len - * @return - * @throws IOException - * @see java.io.BufferedInputStream#read(byte[], int, int) - */ + public int read(byte[] b, int off, int len) throws IOException { validate(); return is.read(b, off, len); } - /** - * @param b - * @return - * @throws IOException - * @see java.io.FilterInputStream#read(byte[]) - */ + public int read(byte[] b) throws IOException { validate(); return is.read(b); } - /** - * @throws IOException - * @see java.io.BufferedInputStream#reset() - */ + public void reset() throws IOException { validate(); is.reset(); } - /** - * @param n - * @return - * @throws IOException - * @see java.io.BufferedInputStream#skip(long) - */ + public long skip(long n) throws IOException { validate(); return is.skip(n); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |