Update of /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27590/lexer
Modified Files:
Page.java
Log Message:
Fix Bug #1461473 Relative links starting with ?
Added overloaded methods taking boolean 'strict' flag on URL manipulators.
Default is loose interpretation like most browsers.
Index: Page.java
===================================================================
RCS file: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Page.java,v
retrieving revision 1.53
retrieving revision 1.54
diff -C2 -d -r1.53 -r1.54
*** Page.java 19 Mar 2006 17:09:09 -0000 1.53
--- Page.java 7 Apr 2006 00:58:19 -0000 1.54
***************
*** 828,841 ****
/**
! * Build a URL from the link and base provided.
! * @return An absolute URL.
* @param link The (relative) URI.
* @param base The base URL of the page, either from the <BASE> tag
* or, if none, the URL the page is being fetched from.
* @exception MalformedURLException If creating the URL fails.
*/
public URL constructUrl (String link, String base)
throws MalformedURLException
{
String path;
boolean modified;
--- 828,860 ----
/**
! * Build a URL from the link and base provided using non-strict rules.
* @param link The (relative) URI.
* @param base The base URL of the page, either from the <BASE> tag
* or, if none, the URL the page is being fetched from.
+ * @return An absolute URL.
* @exception MalformedURLException If creating the URL fails.
+ * @see #constructUrl(String, String, boolean)
*/
public URL constructUrl (String link, String base)
throws MalformedURLException
{
+ return (constructUrl (link, base, false));
+ }
+
+ /**
+ * Build a URL from the link and base provided.
+ * @param link The (relative) URI.
+ * @param base The base URL of the page, either from the <BASE> tag
+ * or, if none, the URL the page is being fetched from.
+ * @param strict If <code>true</code> a link starting with '?' is handled
+ * according to <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
+ * otherwise the common interpretation of a query appended to the base
+ * is used instead.
+ * @return An absolute URL.
+ * @exception MalformedURLException If creating the URL fails.
+ */
+ public URL constructUrl (String link, String base, boolean strict)
+ throws MalformedURLException
+ {
String path;
boolean modified;
***************
*** 844,848 ****
URL url; // constructed URL combining relative link and base
! url = new URL (new URL (base), link);
path = url.getFile ();
modified = false;
--- 863,875 ----
URL url; // constructed URL combining relative link and base
! // Bug #1461473 Relative links starting with ?
! if (!strict && ('?' == link.charAt (0)))
! { // remove query part of base if any
! if (-1 != (index = base.lastIndexOf ('?')))
! base = base.substring (0, index);
! url = new URL (base + link);
! }
! else
! url = new URL (new URL (base), link);
path = url.getFile ();
modified = false;
***************
*** 887,890 ****
--- 914,932 ----
public String getAbsoluteURL (String link)
{
+ return (getAbsoluteURL (link, false));
+ }
+
+ /**
+ * Create an absolute URL from a relative link.
+ * @param link The reslative portion of a URL.
+ * @param strict If <code>true</code> a link starting with '?' is handled
+ * according to <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
+ * otherwise the common interpretation of a query appended to the base
+ * is used instead.
+ * @return The fully qualified URL or the original link if it was absolute
+ * already or a failure occured.
+ */
+ public String getAbsoluteURL (String link, boolean strict)
+ {
String base;
URL url;
***************
*** 903,907 ****
else
{
! url = constructUrl (link, base);
ret = url.toExternalForm ();
}
--- 945,949 ----
else
{
! url = constructUrl (link, base, strict);
ret = url.toExternalForm ();
}
|