From: <bra...@us...> - 2010-05-29 00:48:42
|
Revision: 3131 http://archive-access.svn.sourceforge.net/archive-access/?rev=3131&view=rev Author: bradtofel Date: 2010-05-29 00:48:36 +0000 (Sat, 29 May 2010) Log Message: ----------- INTERFACE: reduced specificity from Resource to InputStream, add method to get the full ParseContext metdata Map, added new Interface representing all available parse events. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/charset/CharsetDetector.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/AllEventsHandler.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/charset/CharsetDetector.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/charset/CharsetDetector.java 2010-05-28 23:18:42 UTC (rev 3130) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/charset/CharsetDetector.java 2010-05-29 00:48:36 UTC (rev 3131) @@ -25,6 +25,7 @@ package org.archive.wayback.replay.charset; import java.io.IOException; +import java.io.InputStream; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; import java.util.Iterator; @@ -117,7 +118,7 @@ * @return String character set found from META tags in the HTML * @throws IOException */ - protected String getCharsetFromMeta(Resource resource) throws IOException { + protected String getCharsetFromMeta(InputStream resource) throws IOException { String charsetName = null; byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; @@ -143,7 +144,7 @@ * @return String character encoding found, or null if nothing looked good. * @throws IOException */ - protected String getCharsetFromBytes(Resource resource) throws IOException { + protected String getCharsetFromBytes(InputStream resource) throws IOException { String charsetName = null; byte[] bbuffer = new byte[MAX_CHARSET_READAHEAD]; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-05-28 23:18:42 UTC (rev 3130) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-05-29 00:48:36 UTC (rev 3131) @@ -27,6 +27,7 @@ import java.net.URISyntaxException; import java.net.URL; import java.util.HashMap; +import java.util.Map; import org.apache.commons.httpclient.URIException; import org.archive.net.UURI; @@ -77,6 +78,9 @@ public String getData(String key) { return data.get(key); } + public Map<String,String> getMap() { + return data; + } /** * @param url against which relative URLs should be resolved for this parse */ @@ -102,7 +106,8 @@ url = url.substring(0,hashIdx); } try { - return UURIFactory.getInstance(baseUrl, url).toString() + frag; + return baseUrl.resolve(url,false).toString() + frag; +// return UURIFactory.getInstance(baseUrl, url).toString() + frag; } catch (URIException e) { e.printStackTrace(); } Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/AllEventsHandler.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/AllEventsHandler.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/AllEventsHandler.java 2010-05-29 00:48:36 UTC (rev 3131) @@ -0,0 +1,7 @@ +package org.archive.wayback.util.htmllex.handlers; + +public interface AllEventsHandler extends CloseTagHandler, ContentTextHandler, +CSSTextHandler, JSTextHandler, OpenTagHandler, ParseCompleteHandler, +RemarkTextHandler { + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/handlers/AllEventsHandler.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |