From: <bra...@us...> - 2009-10-22 23:42:23
|
Revision: 2816 http://archive-access.svn.sourceforge.net/archive-access/?rev=2816&view=rev Author: bradtofel Date: 2009-10-22 23:42:14 +0000 (Thu, 22 Oct 2009) Log Message: ----------- FEATURE: now attempts to dechunkify Chunk-Encoded streams. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcResource.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/WarcResource.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2009-10-22 23:41:35 UTC (rev 2815) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Resource.java 2009-10-22 23:42:14 UTC (rev 2816) @@ -28,6 +28,8 @@ import java.io.InputStream; import java.util.Map; +import org.apache.commons.httpclient.ChunkedInputStream; + /** * Abstraction on top of a document stored in a WaybackCollection. Currently * implemented subclasses include ArcResource and WarcResource. @@ -58,6 +60,15 @@ } } /** + * indicate that there is a Transfer-Encoding: chunked header, so the input + * data should be dechunked as it is read. + * @throws IOException + */ + public void setChunkedEncoding() throws IOException { + validate(); + is = new ChunkedInputStream(is); + } + /** * @return * @throws IOException * @see java.io.BufferedInputStream#available() Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcResource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcResource.java 2009-10-22 23:41:35 UTC (rev 2815) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/ArcResource.java 2009-10-22 23:42:14 UTC (rev 2816) @@ -14,6 +14,7 @@ import org.archive.io.arc.ARCReader; import org.archive.io.arc.ARCRecord; import org.archive.wayback.core.Resource; +import org.archive.wayback.replay.HttpHeaderOperation; public class ArcResource extends Resource { /** @@ -78,6 +79,13 @@ String value = headers[i].getValue(); String name = headers[i].getName(); metaData.put(HTTP_HEADER_PREFIX + name,value); + if(name.toUpperCase().contains( + HttpHeaderOperation.HTTP_TRANSFER_ENC_HEADER)) { + if(value.toUpperCase().contains( + HttpHeaderOperation.HTTP_CHUNKED_ENCODING_HEADER)) { + setChunkedEncoding(); + } + } } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/WarcResource.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/WarcResource.java 2009-10-22 23:41:35 UTC (rev 2815) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/resourcefile/WarcResource.java 2009-10-22 23:42:14 UTC (rev 2816) @@ -13,6 +13,7 @@ import org.archive.io.warc.WARCReader; import org.archive.io.warc.WARCRecord; import org.archive.wayback.core.Resource; +import org.archive.wayback.replay.HttpHeaderOperation; public class WarcResource extends Resource { private WARCRecord rec = null; @@ -66,10 +67,17 @@ Header[] tmpHeaders = HttpParser.parseHeaders(rec, ARCConstants.DEFAULT_ENCODING); headers = new Hashtable<String,String>(); + this.setInputStream(rec); for(Header header: tmpHeaders) { headers.put(header.getName(), header.getValue()); + if(header.getName().toUpperCase().contains( + HttpHeaderOperation.HTTP_TRANSFER_ENC_HEADER)) { + if(header.getValue().toUpperCase().contains( + HttpHeaderOperation.HTTP_CHUNKED_ENCODING_HEADER)) { + setChunkedEncoding(); + } + } } - this.setInputStream(rec); parsedHeaders = true; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |