From: <bra...@us...> - 2010-10-11 18:56:45
|
Revision: 3271 http://archive-access.svn.sourceforge.net/archive-access/?rev=3271&view=rev Author: bradtofel Date: 2010-10-11 18:56:38 +0000 (Mon, 11 Oct 2010) Log Message: ----------- BUGFIX: numerous url escaping/resolving issues, by switching back to UURIFactory.getInstance() Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-10-01 23:23:52 UTC (rev 3270) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2010-10-11 18:56:38 UTC (rev 3271) @@ -28,7 +28,6 @@ import org.apache.commons.lang.StringEscapeUtils; import org.archive.net.UURI; import org.archive.net.UURIFactory; -import org.htmlparser.util.Translate; /** * Class which tracks the context and state involved with parsing an HTML @@ -74,6 +73,10 @@ public String getData(String key) { return data.get(key); } + + /** + * @return the full Map of String to String for this parsing context. + */ public Map<String,String> getMap() { return data; } @@ -104,20 +107,21 @@ frag = url.substring(hashIdx); url = url.substring(0,hashIdx); } + + if(baseUrl == null) { + // TODO: log ? + return url + frag; + } + try { - if(baseUrl == null) { - // TODO: log - System.err.println("No url to resolve!"); - return url; - } - return baseUrl.resolve(url,true).toString() + frag; -// return baseUrl.resolve(url,false).toString() + frag; -// return UURIFactory.getInstance(baseUrl, url).toString() + frag; + + return UURIFactory.getInstance(baseUrl, url).toString() + frag; } catch (URIException e) { e.printStackTrace(); } return url; - } + } + /** * @param url which should be resolved. * @return absolute form of input url, or url itself if javascript: This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |