[HtmlUnit] SF.net SVN: htmlunit:[3226] trunk/htmlunit

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Revision: 3226
          http://htmlunit.svn.sourceforge.net/htmlunit/?rev=3226&view=rev
Author:   sudhan_moghe
Date:     2008-07-30 09:48:11 +0000 (Wed, 30 Jul 2008)

Log Message:
-----------
Fixed bug 2027723: WebClient.expandUrl doesn't handle references correctly. Replaced existing implementation with the one that implements algorithm specified in RFC1808. Also moved the URL processing from WebClient to UrlUtils. Thanks to patch from Martin Tamme.

Modified Paths:
--------------
    trunk/htmlunit/checkstyle_suppressions.xml
    trunk/htmlunit/src/changes/changes.xml
    trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java
    trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/StringUtils.java
    trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/UrlUtils.java
    trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/util/UrlUtilsTest.java

Property Changed:
----------------
    trunk/htmlunit/


Property changes on: trunk/htmlunit
___________________________________________________________________
Modified: svn:ignore
   - maven.log
velocity.log
log
target
build.properties
.classpath
.project
.checkstyle
temp
build_eclipse
.checkclipse
checkstyle.properties
.settings
build
dist
ant-jars
ant-target
artifacts

   + maven.log
velocity.log
log
target
build.properties
.classpath
.project
.checkstyle
temp
build_eclipse
.checkclipse
checkstyle.properties
.settings
build
dist
ant-jars
ant-target
artifacts
target-eclipse


Modified: trunk/htmlunit/checkstyle_suppressions.xml
===================================================================

--- trunk/htmlunit/checkstyle_suppressions.xml	2008-07-26 07:38:39 UTC (rev 3225)
+++ trunk/htmlunit/checkstyle_suppressions.xml	2008-07-30 09:48:11 UTC (rev 3226)
@@ -5,7 +5,6 @@
     "http://www.puppycrawl.com/dtds/suppressions_1_0.dtd">
 
 <suppressions>
-    <suppress checks="FileLength" files="WebClient.java"/>
     <suppress checks="FileLength" files="CSSStyleDeclaration.java"/>
     <suppress checks="FileLength" files="HtmlPage.java"/>
     <suppress checks="FileLength" files="DocumentTest.java"/>

Modified: trunk/htmlunit/src/changes/changes.xml
===================================================================
--- trunk/htmlunit/src/changes/changes.xml	2008-07-26 07:38:39 UTC (rev 3225)
+++ trunk/htmlunit/src/changes/changes.xml	2008-07-30 09:48:11 UTC (rev 3226)
@@ -8,6 +8,9 @@
 
     <body>
         <release version="2.3-SNAPSHOT" description="Bugfixes.">
+            <action type="fix" dev="sudhan_moghe" id="2027723" due-to="Martin Tamme">
+                WebClient.expandUrl doesn't handle references correctly.
+            </action>
             <action type="fix" dev="sdanig" id="2024741">
                 Attribute values for cloned nodes were not available via JavaScript.
             </action>

Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java
===================================================================
--- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java	2008-07-26 07:38:39 UTC (rev 3225)
+++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/WebClient.java	2008-07-30 09:48:11 UTC (rev 3226)
@@ -37,7 +37,6 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.Stack;
-import java.util.StringTokenizer;
 
 import org.apache.commons.codec.DecoderException;
 import org.apache.commons.httpclient.HttpStatus;
@@ -103,6 +102,7 @@
  * @author Ahmed Ashour
  * @author Bruce Chapman
  * @author Sudhan Moghe
+ * @author Martin Tamme
  */
 public class WebClient implements Serializable {
 
@@ -1224,125 +1224,10 @@
      * @return the expansion of the specified base and relative URLs
      * @throws MalformedURLException if an error occurred when creating a URL object
      */
-    public static URL expandUrl(final URL baseUrl, final String relativeUrl)
-        throws MalformedURLException {
+    public static URL expandUrl(final URL baseUrl, final String relativeUrl) throws MalformedURLException {
+        final String newUrl = UrlUtils.resolveUrl(baseUrl, relativeUrl);
 
-        if (StringUtils.isEmpty(relativeUrl)) {
-            return baseUrl;
-        }
-
-        String parseUrl = relativeUrl.trim();
-
-        // section 2.4.2 - parsing scheme
-        final int schemeIndex = parseUrl.indexOf(":");
-        if (schemeIndex != -1) {
-            boolean isProtocolSpecified = true;
-            for (int i = 0; i < schemeIndex; i++) {
-                if (Character.isLetter(parseUrl.charAt(i)) == false) {
-                    isProtocolSpecified = false;
-                    break;
-                }
-            }
-            if (isProtocolSpecified) {
-                return makeUrl(parseUrl);
-            }
-        }
-
-        // section 2.4.3 - parsing network location/login
-        if (parseUrl.startsWith("//")) {
-            return makeUrl(baseUrl.getProtocol() + ":" + parseUrl);
-        }
-
-        // section 2.4.1 - parsing fragment
-        final int fragmentIndex = parseUrl.lastIndexOf("#");
-        String reference = null;
-        if (fragmentIndex != -1) {
-            reference = StringUtils.substringAfterLast(parseUrl, "#");
-            parseUrl = parseUrl.substring(0, fragmentIndex);
-        }
-
-        // section 2.4.4 - parsing query
-        String stringQuery = null;
-        final int queryIndex = parseUrl.lastIndexOf("?");
-        if (queryIndex != -1) {
-            stringQuery = parseUrl.substring(queryIndex);
-            parseUrl = parseUrl.substring(0, queryIndex);
-        }
-
-        // section 2.4.5 - parsing parameters
-        String stringParameters = null;
-        final int parametersIndex = parseUrl.lastIndexOf(";");
-        if (parametersIndex != -1) {
-            stringParameters = parseUrl.substring(parametersIndex);
-            parseUrl = parseUrl.substring(0, parametersIndex);
-        }
-
-        // section 2.4.6 - parse path
-        final List<String> tokens = new ArrayList<String>();
-        final String stringToTokenize;
-        if (parseUrl.trim().length() == 0) {
-            stringToTokenize = baseUrl.getPath();
-        }
-        else if (parseUrl.startsWith("/")) {
-            stringToTokenize = parseUrl;
-        }
-        else {
-            String path = baseUrl.getPath();
-            if (!path.endsWith("/") && parseUrl.length() != 0) {
-                path += "/..";
-            }
-            stringToTokenize = path + "/" + parseUrl;
-        }
-
-        final String pathToTokenize = stringToTokenize;
-        final StringTokenizer tokenizer = new StringTokenizer(pathToTokenize, "/");
-        while (tokenizer.hasMoreTokens()) {
-            tokens.add(tokenizer.nextToken());
-        }
-
-        for (int i = 0; i < tokens.size(); i++) {
-            final String oneToken = tokens.get(i);
-            if (oneToken.length() == 0 || oneToken.equals(".")) {
-                tokens.remove(i--);
-            }
-            else if (oneToken.equals("..")) {
-                tokens.remove(i--);
-                if (i >= 0) {
-                    tokens.remove(i--);
-                }
-            }
-        }
-
-        final StringBuilder buffer = new StringBuilder();
-        buffer.append(baseUrl.getProtocol());
-        buffer.append("://");
-        buffer.append(baseUrl.getHost());
-        final int port = baseUrl.getPort();
-        if (port != -1) {
-            buffer.append(":");
-            buffer.append(port);
-        }
-
-        for (final String token : tokens) {
-            buffer.append("/");
-            buffer.append(token);
-        }
-
-        if (pathToTokenize.endsWith("/")) {
-            buffer.append("/");
-        }
-
-        if (stringParameters != null) {
-            buffer.append(stringParameters);
-        }
-        if (stringQuery != null) {
-            buffer.append(stringQuery);
-        }
-        if (reference != null) {
-            buffer.append("#").append(reference);
-        }
-        final String newUrlString = buffer.toString();
-        return makeUrl(newUrlString);
+        return makeUrl(newUrl);
     }
 
     private WebResponse makeWebResponseForDataUrl(final WebRequestSettings webRequestSettings) throws IOException {

Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/StringUtils.java
===================================================================
--- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/StringUtils.java	2008-07-26 07:38:39 UTC (rev 3225)
+++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/StringUtils.java	2008-07-30 09:48:11 UTC (rev 3226)
@@ -20,6 +20,7 @@
  * @version $Revision$
  * @author Daniel Gredler
  * @author Ahmed Ashour
+ * @author Martin Tamme
  */
 public final class StringUtils {
 
@@ -58,4 +59,27 @@
         }
         return false;
     }
+
+    /**
+     * Returns the index within a given string of the first occurrence of
+     * the specified search character.
+     *
+     * @param s          a string.
+     * @param searchChar a search character.
+     * @param beginIndex the index to start the search from.
+     * @param endIndex   the index to stop the search.
+     * @return the index of the first occurrence of the character in the string or <tt>-1</tt>.
+     */
+    public static int indexOf(
+            final String s,
+            final char searchChar,
+            final int beginIndex,
+            final int endIndex) {
+        for (int i = beginIndex; i < endIndex; i++) {
+            if (s.charAt(i) == searchChar) {
+                return i;
+            }
+        }
+        return -1;
+    }
 }

Modified: trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/UrlUtils.java
===================================================================
--- trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/UrlUtils.java	2008-07-26 07:38:39 UTC (rev 3225)
+++ trunk/htmlunit/src/main/java/com/gargoylesoftware/htmlunit/util/UrlUtils.java	2008-07-30 09:48:11 UTC (rev 3226)
@@ -23,6 +23,8 @@
  *
  * @version $Revision$
  * @author Daniel Gredler
+ * @author Martin Tamme
+ * @author Sudhan Moghe
  */
 public final class UrlUtils {
 
@@ -135,4 +137,425 @@
         return url;
     }
 
+    /**
+     * Resolves a given relative URL against a base URL. See
+     * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>
+     * Section 4 for more details.
+     *
+     * @param baseUrl     The base URL in which to resolve the specification.
+     * @param relativeUrl The relative URL to resolve against the base URL.
+     * @return the resolved specification.
+     */
+    public static String resolveUrl(final String baseUrl, final String relativeUrl) {
+        if (baseUrl == null) {
+            throw new IllegalArgumentException("Base URL must not be null");
+        }
+        if (relativeUrl == null) {
+            throw new IllegalArgumentException("Relative URL must not be null");
+        }
+        final Url url = resolveUrl(parseUrl(baseUrl.trim()), relativeUrl.trim());
+
+        return url.toString();
+    }
+
+    /**
+     * Resolves a given relative URL against a base URL. See
+     * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>
+     * Section 4 for more details.
+     *
+     * @param baseUrl     The base URL in which to resolve the specification.
+     * @param relativeUrl The relative URL to resolve against the base URL.
+     * @return the resolved specification.
+     */
+    public static String resolveUrl(final URL baseUrl, final String relativeUrl) {
+        if (baseUrl == null) {
+            throw new IllegalArgumentException("Base URL must not be null");
+        }
+        return resolveUrl(baseUrl.toExternalForm(), relativeUrl);
+    }
+
+    /**
+     * Parses a given specification using the algorithm depicted in
+     * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>:
+     *
+     * Section 2.4: Parsing a URL
+     *
+     *   An accepted method for parsing URLs is useful to clarify the
+     *   generic-RL syntax of Section 2.2 and to describe the algorithm for
+     *   resolving relative URLs presented in Section 4. This section
+     *   describes the parsing rules for breaking down a URL (relative or
+     *   absolute) into the component parts described in Section 2.1.  The
+     *   rules assume that the URL has already been separated from any
+     *   surrounding text and copied to a "parse string". The rules are
+     *   listed in the order in which they would be applied by the parser.
+     *
+     * @param spec The specification to parse.
+     * @return the parsed specification.
+     */
+    private static Url parseUrl(final String spec) {
+        final Url url = new Url();
+        int startIndex = 0;
+        int endIndex = spec.length();
+
+        // Section 2.4.1: Parsing the Fragment Identifier
+        //
+        //   If the parse string contains a crosshatch "#" character, then the
+        //   substring after the first (left-most) crosshatch "#" and up to the
+        //   end of the parse string is the <fragment> identifier. If the
+        //   crosshatch is the last character, or no crosshatch is present, then
+        //   the fragment identifier is empty. The matched substring, including
+        //   the crosshatch character, is removed from the parse string before
+        //   continuing.
+        //
+        //   Note that the fragment identifier is not considered part of the URL.
+        //   However, since it is often attached to the URL, parsers must be able
+        //   to recognize and set aside fragment identifiers as part of the
+        //   process.
+        final int crosshatchIndex = StringUtils.indexOf(spec, '#', startIndex, endIndex);
+
+        if (crosshatchIndex >= 0) {
+            url.fragment_ = spec.substring(crosshatchIndex + 1, endIndex);
+            endIndex = crosshatchIndex;
+        }
+        // Section 2.4.2: Parsing the Scheme
+        //
+        //   If the parse string contains a colon ":" after the first character
+        //   and before any characters not allowed as part of a scheme name (i.e.,
+        //   any not an alphanumeric, plus "+", period ".", or hyphen "-"), the
+        //   <scheme> of the URL is the substring of characters up to but not
+        //   including the first colon. These characters and the colon are then
+        //   removed from the parse string before continuing.
+        final int colonIndex = StringUtils.indexOf(spec, ':', startIndex, endIndex);
+
+        if (colonIndex > 0) {
+            final String scheme = spec.substring(startIndex, colonIndex);
+            if (isValidScheme(scheme)) {
+                url.scheme_ = scheme;
+                startIndex = colonIndex + 1;
+            }
+        }
+        // Section 2.4.3: Parsing the Network Location/Login
+        //
+        //   If the parse string begins with a double-slash "//", then the
+        //   substring of characters after the double-slash and up to, but not
+        //   including, the next slash "/" character is the network location/login
+        //   (<net_loc>) of the URL. If no trailing slash "/" is present, the
+        //   entire remaining parse string is assigned to <net_loc>. The double-
+        //   slash and <net_loc> are removed from the parse string before
+        //   continuing.
+        //
+        // Note: We also accept a question mark "?" or a semicolon ";" character as
+        //       delimiters for the network location/login (<net_loc>) of the URL.
+        final int locationStartIndex;
+        int locationEndIndex;
+
+        if (spec.startsWith("//", startIndex)) {
+            locationStartIndex = startIndex + 2;
+            locationEndIndex = StringUtils.indexOf(spec, '/', locationStartIndex, endIndex);
+            if (locationEndIndex >= 0) {
+                startIndex = locationEndIndex;
+            }
+        }
+        else {
+            locationStartIndex = -1;
+            locationEndIndex = -1;
+        }
+        // Section 2.4.4: Parsing the Query Information
+        //
+        //   If the parse string contains a question mark "?" character, then the
+        //   substring after the first (left-most) question mark "?" and up to the
+        //   end of the parse string is the <query> information. If the question
+        //   mark is the last character, or no question mark is present, then the
+        //   query information is empty. The matched substring, including the
+        //   question mark character, is removed from the parse string before
+        //   continuing.
+        final int questionMarkIndex = StringUtils.indexOf(spec, '?', startIndex, endIndex);
+
+        if (questionMarkIndex >= 0) {
+            if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
+                // The substring of characters after the double-slash and up to, but not
+                // including, the question mark "?" character is the network location/login
+                // (<net_loc>) of the URL.
+                locationEndIndex = questionMarkIndex;
+                startIndex = questionMarkIndex;
+            }
+            url.query_ = spec.substring(questionMarkIndex + 1, endIndex);
+            endIndex = questionMarkIndex;
+        }
+        // Section 2.4.5: Parsing the Parameters
+        //
+        //   If the parse string contains a semicolon ";" character, then the
+        //   substring after the first (left-most) semicolon ";" and up to the end
+        //   of the parse string is the parameters (<params>). If the semicolon
+        //   is the last character, or no semicolon is present, then <params> is
+        //   empty. The matched substring, including the semicolon character, is
+        //   removed from the parse string before continuing.
+        final int semicolonIndex = StringUtils.indexOf(spec, ';', startIndex, endIndex);
+
+        if (semicolonIndex >= 0) {
+            if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
+                // The substring of characters after the double-slash and up to, but not
+                // including, the semicolon ";" character is the network location/login
+                // (<net_loc>) of the URL.
+                locationEndIndex = semicolonIndex;
+                startIndex = semicolonIndex;
+            }
+            url.parameters_ = spec.substring(semicolonIndex + 1, endIndex);
+            endIndex = semicolonIndex;
+        }
+        // Section 2.4.6: Parsing the Path
+        //
+        //   After the above steps, all that is left of the parse string is the
+        //   URL <path> and the slash "/" that may precede it. Even though the
+        //   initial slash is not part of the URL path, the parser must remember
+        //   whether or not it was present so that later processes can
+        //   differentiate between relative and absolute paths. Often this is
+        //   done by simply storing the preceding slash along with the path.
+        if ((locationStartIndex >= 0) && (locationEndIndex < 0)) {
+            // The entire remaining parse string is assigned to the network
+            // location/login (<net_loc>) of the URL.
+            locationEndIndex = endIndex;
+        }
+        else if (startIndex < endIndex) {
+            url.path_ = spec.substring(startIndex, endIndex);
+        }
+        // Set the network location/login (<net_loc>) of the URL.
+        if ((locationStartIndex >= 0) && (locationEndIndex >= 0)) {
+            url.location_ = spec.substring(locationStartIndex, locationEndIndex);
+        }
+        return url;
+    }
+
+    /*
+     * Returns true if specified string is a valid scheme name.
+     */
+    private static boolean isValidScheme(final String scheme) {
+        final int length = scheme.length();
+        if (length < 1) {
+            return false;
+        }
+        char c = scheme.charAt(0);
+        if (!Character.isLetter(c)) {
+            return false;
+        }
+        for (int i = 1; i < length; i++) {
+            c = scheme.charAt(i);
+            if (!Character.isLetterOrDigit(c) && c != '.' && c != '+' && c != '-') {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Resolves a given relative URL against a base URL using the algorithm
+     * depicted in <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>:
+     *
+     * Section 4: Resolving Relative URLs
+     *
+     *   This section describes an example algorithm for resolving URLs within
+     *   a context in which the URLs may be relative, such that the result is
+     *   always a URL in absolute form. Although this algorithm cannot
+     *   guarantee that the resulting URL will equal that intended by the
+     *   original author, it does guarantee that any valid URL (relative or
+     *   absolute) can be consistently transformed to an absolute form given a
+     *   valid base URL.
+     *
+     * @param baseUrl     The base URL in which to resolve the specification.
+     * @param relativeUrl The relative URL to resolve against the base URL.
+     * @return the resolved specification.
+     */
+    private static Url resolveUrl(final Url baseUrl, final String relativeUrl) {
+        final Url url = parseUrl(relativeUrl);
+        // Step 1: The base URL is established according to the rules of
+        //         Section 3.  If the base URL is the empty string (unknown),
+        //         the embedded URL is interpreted as an absolute URL and
+        //         we are done.
+        if (baseUrl == null) {
+            return url;
+        }
+        // Step 2: Both the base and embedded URLs are parsed into their
+        //         component parts as described in Section 2.4.
+        //      a) If the embedded URL is entirely empty, it inherits the
+        //         entire base URL (i.e., is set equal to the base URL)
+        //         and we are done.
+        if (relativeUrl.length() == 0) {
+            return new Url(baseUrl);
+        }
+        //      b) If the embedded URL starts with a scheme name, it is
+        //         interpreted as an absolute URL and we are done.
+        if (url.scheme_ != null) {
+            return url;
+        }
+        //      c) Otherwise, the embedded URL inherits the scheme of
+        //         the base URL.
+        url.scheme_ = baseUrl.scheme_;
+        // Step 3: If the embedded URL's <net_loc> is non-empty, we skip to
+        //         Step 7.  Otherwise, the embedded URL inherits the <net_loc>
+        //         (if any) of the base URL.
+        if (url.location_ != null) {
+            return url;
+        }
+        url.location_ = baseUrl.location_;
+        // Step 4: If the embedded URL path is preceded by a slash "/", the
+        //         path is not relative and we skip to Step 7.
+        if ((url.path_ != null) && url.path_.startsWith("/")) {
+            return url;
+        }
+        // Step 5: If the embedded URL path is empty (and not preceded by a
+        //         slash), then the embedded URL inherits the base URL path,
+        //         and
+        if (url.path_ == null) {
+            url.path_ = baseUrl.path_;
+            //  a) if the embedded URL's <params> is non-empty, we skip to
+            //     step 7; otherwise, it inherits the <params> of the base
+            //     URL (if any) and
+            if (url.parameters_ != null) {
+                return url;
+            }
+            url.parameters_ = baseUrl.parameters_;
+            //  b) if the embedded URL's <query> is non-empty, we skip to
+            //     step 7; otherwise, it inherits the <query> of the base
+            //     URL (if any) and we skip to step 7.
+            if (url.query_ != null) {
+                return url;
+            }
+            url.query_ = baseUrl.query_;
+            return url;
+        }
+        // Step 6: The last segment of the base URL's path (anything
+        //         following the rightmost slash "/", or the entire path if no
+        //         slash is present) is removed and the embedded URL's path is
+        //         appended in its place.  The following operations are
+        //         then applied, in order, to the new path:
+        final String basePath = baseUrl.path_;
+        String path = new String();
+
+        if (basePath != null) {
+            final int lastSlashIndex = basePath.lastIndexOf('/');
+
+            if (lastSlashIndex >= 0) {
+                path = basePath.substring(0, lastSlashIndex + 1);
+            }
+        }
+        else {
+            path = "/";
+        }
+        path = path.concat(url.path_);
+        //      a) All occurrences of "./", where "." is a complete path
+        //         segment, are removed.
+        int pathSegmentIndex;
+
+        while ((pathSegmentIndex = path.indexOf("/./")) >= 0) {
+            path = path.substring(0, pathSegmentIndex + 1).concat(path.substring(pathSegmentIndex + 3));
+        }
+        //      b) If the path ends with "." as a complete path segment,
+        //         that "." is removed.
+        if (path.endsWith("/.")) {
+            path = path.substring(0, path.length() - 1);
+        }
+        //      c) All occurrences of "<segment>/../", where <segment> is a
+        //         complete path segment not equal to "..", are removed.
+        //         Removal of these path segments is performed iteratively,
+        //         removing the leftmost matching pattern on each iteration,
+        //         until no matching pattern remains.
+        while ((pathSegmentIndex = path.indexOf("/../")) > 0) {
+            final String pathSegment = path.substring(0, pathSegmentIndex);
+            final int slashIndex = pathSegment.lastIndexOf('/');
+
+            if (slashIndex < 0) {
+                continue;
+            }
+            if (!pathSegment.substring(slashIndex).equals("..")) {
+                path = path.substring(0, slashIndex + 1).concat(path.substring(pathSegmentIndex + 4));
+            }
+        }
+        //      d) If the path ends with "<segment>/..", where <segment> is a
+        //         complete path segment not equal to "..", that
+        //         "<segment>/.." is removed.
+        if (path.endsWith("/..")) {
+            final String pathSegment = path.substring(0, path.length() - 3);
+            final int slashIndex = pathSegment.lastIndexOf('/');
+
+            if (slashIndex >= 0) {
+                path = path.substring(0, slashIndex + 1);
+            }
+        }
+        url.path_ = path;
+        // Step 7: The resulting URL components, including any inherited from
+        //         the base URL, are recombined to give the absolute form of
+        //         the embedded URL.
+        return url;
+    }
+
+    /**
+     * Class <tt>Url</tt> represents a Uniform Resource Locator.
+     *
+     * @author Martin Tamme
+     */
+    private static class Url {
+
+        private String scheme_;
+        private String location_;
+        private String path_;
+        private String parameters_;
+        private String query_;
+        private String fragment_;
+
+        /**
+         * Creates a <tt>Url</tt> object.
+         */
+        public Url() {
+        }
+
+        /**
+         * Creates a <tt>Url</tt> object from the specified
+         * <tt>Url</tt> object.
+         *
+         * @param url a <tt>Url</tt> object.
+         */
+        public Url(final Url url) {
+            scheme_ = url.scheme_;
+            location_ = url.location_;
+            path_ = url.path_;
+            parameters_ = url.parameters_;
+            query_ = url.query_;
+            fragment_ = url.fragment_;
+        }
+
+        /**
+         * Returns a string representation of the <tt>Url</tt> object.
+         *
+         * @return a string representation of the <tt>Url</tt> object.
+         */
+        @Override
+        public String toString() {
+            final StringBuilder sb = new StringBuilder();
+
+            if (scheme_ != null) {
+                sb.append(scheme_);
+                sb.append(':');
+            }
+            if (location_ != null) {
+                sb.append("//");
+                sb.append(location_);
+            }
+            if (path_ != null) {
+                sb.append(path_);
+            }
+            if (parameters_ != null) {
+                sb.append(';');
+                sb.append(parameters_);
+            }
+            if (query_ != null) {
+                sb.append('?');
+                sb.append(query_);
+            }
+            if (fragment_ != null) {
+                sb.append('#');
+                sb.append(fragment_);
+            }
+            return sb.toString();
+        }
+    }
 }

Modified: trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/util/UrlUtilsTest.java
===================================================================
--- trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/util/UrlUtilsTest.java	2008-07-26 07:38:39 UTC (rev 3225)
+++ trunk/htmlunit/src/test/java/com/gargoylesoftware/htmlunit/util/UrlUtilsTest.java	2008-07-30 09:48:11 UTC (rev 3226)
@@ -25,6 +25,8 @@
  *
  * @version $Revision$
  * @author Daniel Gredler
+ * @author Martin Tamme
+ * @author Sudhan Moghe
  */
 public class UrlUtilsTest extends WebTestCase {
 
@@ -96,4 +98,80 @@
         assertEquals("http://my.home.com/index.html?xyz#ref", b.toExternalForm());
     }
 
+    /**
+     * Test {@link UrlUtils#resolveUrl(String, String)} with the normal examples taken from
+     * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a> Section 5.1.
+     */
+    @Test
+    public void resolveUrlWithNormalExamples() {
+        final String baseUrl = "http://a/b/c/d;p?q#f";
+
+        assertEquals("g:h",                  UrlUtils.resolveUrl(baseUrl, "g:h"));
+        assertEquals("http://a/b/c/g",       UrlUtils.resolveUrl(baseUrl, "g"));
+        assertEquals("http://a/b/c/g",       UrlUtils.resolveUrl(baseUrl, "./g"));
+        assertEquals("http://a/b/c/g/",      UrlUtils.resolveUrl(baseUrl, "g/"));
+        assertEquals("http://a/g",           UrlUtils.resolveUrl(baseUrl, "/g"));
+        assertEquals("http://g",             UrlUtils.resolveUrl(baseUrl, "//g"));
+        assertEquals("http://a/b/c/d;p?y",   UrlUtils.resolveUrl(baseUrl, "?y"));
+        assertEquals("http://a/b/c/g?y",     UrlUtils.resolveUrl(baseUrl, "g?y"));
+        assertEquals("http://a/b/c/g?y/./x", UrlUtils.resolveUrl(baseUrl, "g?y/./x"));
+        assertEquals("http://a/b/c/d;p?q#s", UrlUtils.resolveUrl(baseUrl, "#s"));
+        assertEquals("http://a/b/c/g#s",     UrlUtils.resolveUrl(baseUrl, "g#s"));
+        assertEquals("http://a/b/c/g#s/./x", UrlUtils.resolveUrl(baseUrl, "g#s/./x"));
+        assertEquals("http://a/b/c/g?y#s",   UrlUtils.resolveUrl(baseUrl, "g?y#s"));
+        assertEquals("http://a/b/c/d;x",     UrlUtils.resolveUrl(baseUrl, ";x"));
+        assertEquals("http://a/b/c/g;x",     UrlUtils.resolveUrl(baseUrl, "g;x"));
+        assertEquals("http://a/b/c/g;x?y#s", UrlUtils.resolveUrl(baseUrl, "g;x?y#s"));
+        assertEquals("http://a/b/c/",        UrlUtils.resolveUrl(baseUrl, "."));
+        assertEquals("http://a/b/c/",        UrlUtils.resolveUrl(baseUrl, "./"));
+        assertEquals("http://a/b/",          UrlUtils.resolveUrl(baseUrl, ".."));
+        assertEquals("http://a/b/",          UrlUtils.resolveUrl(baseUrl, "../"));
+        assertEquals("http://a/b/g",         UrlUtils.resolveUrl(baseUrl, "../g"));
+        assertEquals("http://a/",            UrlUtils.resolveUrl(baseUrl, "../.."));
+        assertEquals("http://a/",            UrlUtils.resolveUrl(baseUrl, "../../"));
+        assertEquals("http://a/g",           UrlUtils.resolveUrl(baseUrl, "../../g"));
+
+        //Following two cases were failing when original implementation was modified to handle
+        //the cases given in RFC 1808. Lots of other test cases failed because of that.
+        assertEquals(URL_FIRST + "/foo.xml", UrlUtils.resolveUrl(URL_FIRST, "/foo.xml"));
+        assertEquals(URL_FIRST + "/foo.xml", UrlUtils.resolveUrl(URL_FIRST, "foo.xml"));
+    }
+
+    /**
+     * Test {@link UrlUtils#resolveUrl(String, String)} with the abnormal examples taken from
+     * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a> Section 5.2.
+     */
+    @Test
+    public void resolveUrlWithAbnormalExamples() {
+        final String baseUrl = "http://a/b/c/d;p?q#f";
+
+        assertEquals("http://a/b/c/d;p?q#f", UrlUtils.resolveUrl(baseUrl, ""));
+        assertEquals("http://a/../g",        UrlUtils.resolveUrl(baseUrl, "../../../g"));
+        assertEquals("http://a/../../g",     UrlUtils.resolveUrl(baseUrl, "../../../../g"));
+        assertEquals("http://a/./g",         UrlUtils.resolveUrl(baseUrl, "/./g"));
+        assertEquals("http://a/../g",        UrlUtils.resolveUrl(baseUrl, "/../g"));
+        assertEquals("http://a/b/c/g.",      UrlUtils.resolveUrl(baseUrl, "g."));
+        assertEquals("http://a/b/c/.g",      UrlUtils.resolveUrl(baseUrl, ".g"));
+        assertEquals("http://a/b/c/g..",     UrlUtils.resolveUrl(baseUrl, "g.."));
+        assertEquals("http://a/b/c/..g",     UrlUtils.resolveUrl(baseUrl, "..g"));
+        assertEquals("http://a/b/g",         UrlUtils.resolveUrl(baseUrl, "./../g"));
+        assertEquals("http://a/b/c/g/",      UrlUtils.resolveUrl(baseUrl, "./g/."));
+        assertEquals("http://a/b/c/g/h",     UrlUtils.resolveUrl(baseUrl, "g/./h"));
+        assertEquals("http://a/b/c/h",       UrlUtils.resolveUrl(baseUrl, "g/../h"));
+        assertEquals("http:g",               UrlUtils.resolveUrl(baseUrl, "http:g"));
+        assertEquals("http:",                UrlUtils.resolveUrl(baseUrl, "http:"));
+    }
+
+    /**
+     * Test {@link UrlUtils#resolveUrl(String, String)} with extra examples.
+     */
+    @Test
+    public void resolveUrlWithExtraExamples() {
+        final String baseUrl = "http://a/b/c/d;p?q#f";
+
+        assertEquals("http://a/b/c/d;",      UrlUtils.resolveUrl(baseUrl, ";"));
+        assertEquals("http://a/b/c/d;p?",    UrlUtils.resolveUrl(baseUrl, "?"));
+        assertEquals("http://a/b/c/d;p?q#",  UrlUtils.resolveUrl(baseUrl, "#"));
+        assertEquals("http://a/b/c/d;p?q#s", UrlUtils.resolveUrl(baseUrl, "#s"));
+    }
 }


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




[HtmlUnit] SF.net SVN: htmlunit:[3226] trunk/htmlunit

Java GUI-Less browser, supporting JavaScript, to run against web pages

[HtmlUnit] SF.net SVN: htmlunit:[3226] trunk/htmlunit