From: <bra...@us...> - 2009-11-12 22:22:41
|
Revision: 2930 http://archive-access.svn.sourceforge.net/archive-access/?rev=2930&view=rev Author: bradtofel Date: 2009-11-12 22:22:32 +0000 (Thu, 12 Nov 2009) Log Message: ----------- BUGFIX: now includes original named link (#name fragment at end of input URL) in resolved url Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/htmllex/ParseContextTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2009-11-11 00:25:13 UTC (rev 2929) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/htmllex/ParseContext.java 2009-11-12 22:22:32 UTC (rev 2930) @@ -24,7 +24,7 @@ */ package org.archive.wayback.util.htmllex; -import java.net.MalformedURLException; +import java.net.URISyntaxException; import java.net.URL; import java.util.HashMap; @@ -54,15 +54,31 @@ private boolean inScriptText = false; private HashMap<String,String> data = null; + /** + * constructor + */ public ParseContext() { data = new HashMap<String, String>(); } + /** + * Stores arbitrary key value pairs in this ParseContext + * @param key for storage + * @param value for storage + */ public void putData(String key, String value) { data.put(key, value); } + /** + * Retrieves previously stored data for key key from this ParseContext + * @param key under which value was stored + * @return previously stored value for key or null, if nothing was stored + */ public String getData(String key) { return data.get(key); } + /** + * @param url against which relative URLs should be resolved for this parse + */ public void setBaseUrl(URL url) { try { baseUrl = UURIFactory.getInstance(url.toExternalForm()); @@ -70,23 +86,37 @@ e.printStackTrace(); } } - public String resolve(String url) throws MalformedURLException { + /** + * @param url which should be resolved against the baseUrl for this + * ParseContext. + * @return absolute form of url, resolved against baseUrl if relative. + * @throws URISyntaxException if the input URL is malformed + */ + public String resolve(String url) throws URISyntaxException { + int hashIdx = url.indexOf('#'); + String frag = ""; + if(hashIdx != -1) { + frag = url.substring(hashIdx); + url = url.substring(0,hashIdx); + } try { - return baseUrl.resolve(url).toString(); + return baseUrl.resolve(url).toString() + frag; } catch (URIException e) { e.printStackTrace(); } return url; -// URL tmp = new URL(baseUrl,url); -// return tmp.toString(); } + /** + * @param url which should be resolved. + * @return absolute form of input url, or url itself if javascript: + */ public String contextualizeUrl(String url) { if(url.startsWith("javascript:")) { return url; } try { return resolve(url); - } catch (MalformedURLException e) { + } catch (URISyntaxException e) { e.printStackTrace(); return url; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/htmllex/ParseContextTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/htmllex/ParseContextTest.java 2009-11-11 00:25:13 UTC (rev 2929) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/htmllex/ParseContextTest.java 2009-11-12 22:22:32 UTC (rev 2930) @@ -25,13 +25,9 @@ package org.archive.wayback.util.htmllex; -import java.net.MalformedURLException; import java.net.URI; import java.net.URL; -import org.archive.net.UURI; -import org.archive.net.UURIFactory; - import junit.framework.TestCase; /** @@ -46,6 +42,14 @@ public void testContextualizeUrl() { ParseContext pc = new ParseContext(); try { + + URI tmp = new URI("http://base.com/foo.html#REF"); + String ref = tmp.getFragment(); + assertEquals("REF",ref); + tmp = new URI("http://base.com/foo.html"); + assertNull(tmp.getFragment()); + + pc.setBaseUrl(new URL("http://base.com/")); assertEquals("http://base.com/images.gif", pc.contextualizeUrl("/images.gif")); @@ -57,6 +61,12 @@ pc.contextualizeUrl("/image/1s.gif")); assertEquals("http://base.com/image/1s.gif", pc.contextualizeUrl("../../image/1s.gif")); + assertEquals("http://base.com/image/1s.gif", + pc.contextualizeUrl("/../../image/1s.gif")); + assertEquals("http://base.com/image/1.html#REF", + pc.contextualizeUrl("/../../image/1.html#REF")); + assertEquals("http://base.com/image/1.html#REF FOO", + pc.contextualizeUrl("/../../image/1.html#REF FOO")); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |