[Jsimplebrowser-svn] SF.net SVN: jsimplebrowser: [37] trunk
Status: Alpha
Brought to you by:
rdimarco
From: <rdi...@us...> - 2007-08-10 15:46:44
|
Revision: 37 http://jsimplebrowser.svn.sourceforge.net/jsimplebrowser/?rev=37&view=rev Author: rdimarco Date: 2007-08-10 08:46:42 -0700 (Fri, 10 Aug 2007) Log Message: ----------- Moved from JTidy to CyberNeko for better parsing. Modified Paths: -------------- trunk/core/pom.xml trunk/core/src/main/java/com/software416/jsimplebrowser/Browser.java trunk/pom.xml Added Paths: ----------- trunk/core/src/main/java/com/software416/jsimplebrowser/BrowserRuntimeException.java trunk/core/src/main/java/com/software416/jsimplebrowser/History.java trunk/core/src/main/java/com/software416/jsimplebrowser/RequestInfo.java trunk/core/src/main/java/com/software416/jsimplebrowser/Window.java trunk/core/src/main/java/com/software416/jsimplebrowser/impl/ trunk/core/src/main/java/com/software416/jsimplebrowser/impl/BrowserImpl.java trunk/core/src/main/java/com/software416/jsimplebrowser/impl/RequestInfoImpl.java trunk/core/src/main/java/com/software416/jsimplebrowser/impl/WindowImpl.java trunk/core/src/main/java/com/software416/jsimplebrowser/util/ trunk/core/src/main/java/com/software416/jsimplebrowser/util/BrowserHelper.java trunk/core/src/main/java/com/software416/jsimplebrowser/util/HtmlToDomConverter.java trunk/core/src/test/java/com/software416/jsimplebrowser/impl/ trunk/core/src/test/java/com/software416/jsimplebrowser/impl/BrowserTest.java trunk/core/src/test/java/com/software416/jsimplebrowser/util/ trunk/core/src/test/java/com/software416/jsimplebrowser/util/BrowserHelperTest.java trunk/core/src/test/resources/badlyFormattedPage.html trunk/core/src/test/resources/formTest1.html trunk/core/src/test/resources/linkTest1.html Removed Paths: ------------- trunk/core/src/main/java/com/software416/jsimplebrowser/BrowserHelper.java trunk/core/src/main/java/com/software416/jsimplebrowser/BrowserWindow.java trunk/core/src/main/java/com/software416/jsimplebrowser/HtmlToDomConverter.java trunk/core/src/main/java/com/software416/jsimplebrowser/SimpleBrowser.java trunk/core/src/test/java/com/software416/jsimplebrowser/BrowserTest.java Property Changed: ---------------- trunk/core/ Property changes on: trunk/core ___________________________________________________________________ Name: svn:ignore - .settings + .settings target Modified: trunk/core/pom.xml =================================================================== --- trunk/core/pom.xml 2007-08-07 21:56:12 UTC (rev 36) +++ trunk/core/pom.xml 2007-08-10 15:46:42 UTC (rev 37) @@ -19,6 +19,10 @@ <artifactId>commons-io</artifactId> </dependency> <dependency> + <groupId>commons-lang</groupId> + <artifactId>commons-lang</artifactId> + </dependency> + <dependency> <groupId>commons-logging</groupId> <artifactId>commons-logging-api</artifactId> <scope>provided</scope> @@ -33,9 +37,15 @@ <artifactId>commons-collections</artifactId> </dependency> <dependency> - <groupId>org.hibernate</groupId> + <groupId>org.cyberneko</groupId> + <artifactId>html-core</artifactId> + </dependency> +<!-- + <dependency> + <groupId>jtidy</groupId> <artifactId>jtidy</artifactId> </dependency> +--> <dependency> <groupId>commons-lang</groupId> <artifactId>commons-lang</artifactId> Modified: trunk/core/src/main/java/com/software416/jsimplebrowser/Browser.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/Browser.java 2007-08-07 21:56:12 UTC (rev 36) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/Browser.java 2007-08-10 15:46:42 UTC (rev 37) @@ -1,27 +1,9 @@ package com.software416.jsimplebrowser; -import java.util.Map; - -import org.apache.commons.collections.MultiMap; -import org.w3c.dom.Document; - public interface Browser { - - public void go(String url) throws BrowserException; - - public void submitForm(String formName, - Map<String, String> userSpecifiedParameterMap) throws BrowserException; - - public void makeRequestWithBody(String link, String body) throws BrowserException; - - public MultiMap getCookies(); - - public String getCookieValue(String cookieName); - - public String getResponseHtml(); - - public Document getResponseDocument(); - - public int getResponseCode(); - + public static final String MAIN_BROWSER_WINDOW_NAME = "main"; + public void open(String url) throws BrowserException; + public void open(String url, String window) throws BrowserException; + public void makeXmlHttpRequest(String url, String requestBody) throws BrowserException; + public Window getWindow(String windowName); } Deleted: trunk/core/src/main/java/com/software416/jsimplebrowser/BrowserHelper.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/BrowserHelper.java 2007-08-07 21:56:12 UTC (rev 36) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/BrowserHelper.java 2007-08-10 15:46:42 UTC (rev 37) @@ -1,46 +0,0 @@ -package com.software416.jsimplebrowser; - -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Pattern; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; - -public class BrowserHelper { - private static final Log LOG = LogFactory.getLog(BrowserHelper.class); - private Browser _browser; - public BrowserHelper(Browser browser) { - _browser = browser; - } - - public String getFirstLinkForTextRegex(String regex) { - List<String> links = getLinksForTextRegex(regex, 1); - return links.size() == 0 ? null : links.get(0); - } - - public List<String> getLinksForTextRegex(String regex) { - return getLinksForTextRegex(regex, Integer.MAX_VALUE); - } - - public List<String> getLinksForTextRegex(String regex, int maxResults) { - List<String> rv = new ArrayList<String>(); - NodeList nl = _browser.getResponseDocument().getElementsByTagName("a"); - Pattern p = Pattern.compile(".*" + regex + ".*", Pattern.CASE_INSENSITIVE); - for (int x = 0; x < nl.getLength() && rv.size() < maxResults; x++) { - Element element = (Element)nl.item(x); - String href = element.getAttribute("href"); - if (LOG.isDebugEnabled()) { - LOG.debug("matching " + regex + " against content " + element.getTextContent()); - } - if (href != null && element.getTextContent() != null && p.matcher(element.getTextContent()).matches()) { - rv.add(href); - } - } - return rv; - } - - -} Added: trunk/core/src/main/java/com/software416/jsimplebrowser/BrowserRuntimeException.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/BrowserRuntimeException.java (rev 0) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/BrowserRuntimeException.java 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,21 @@ +package com.software416.jsimplebrowser; + +public class BrowserRuntimeException extends RuntimeException { + private static final long serialVersionUID = 20070808L; + public BrowserRuntimeException() { + // Support no arg call + } + + public BrowserRuntimeException(String message) { + super(message); + } + + public BrowserRuntimeException(Throwable cause) { + super(cause); + } + + public BrowserRuntimeException(String message, Throwable cause) { + super(message, cause); + } + +} Deleted: trunk/core/src/main/java/com/software416/jsimplebrowser/BrowserWindow.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/BrowserWindow.java 2007-08-07 21:56:12 UTC (rev 36) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/BrowserWindow.java 2007-08-10 15:46:42 UTC (rev 37) @@ -1,5 +0,0 @@ -package com.software416.jsimplebrowser; - -public class BrowserWindow { - -} Added: trunk/core/src/main/java/com/software416/jsimplebrowser/History.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/History.java (rev 0) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/History.java 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,5 @@ +package com.software416.jsimplebrowser; + +public interface History { + // Empty for now... +} Deleted: trunk/core/src/main/java/com/software416/jsimplebrowser/HtmlToDomConverter.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/HtmlToDomConverter.java 2007-08-07 21:56:12 UTC (rev 36) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/HtmlToDomConverter.java 2007-08-10 15:46:42 UTC (rev 37) @@ -1,51 +0,0 @@ -package com.software416.jsimplebrowser; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.w3c.dom.Document; -import org.w3c.tidy.Tidy; -import org.xml.sax.SAXException; - -public class HtmlToDomConverter { - private static final Log LOG = LogFactory.getLog(HtmlToDomConverter.class); - private DocumentBuilder _builder; - public HtmlToDomConverter() { - try { - _builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); - } catch (ParserConfigurationException ex) { - throw new RuntimeException("Weird problem creating a document builder", ex); - } - } - - public Document getDocumentFromHtml(byte[] docBytes) throws HtmlParseException { - Tidy t = new Tidy(); - t.setXHTML(true); - t.setXmlOut(true); - t.setXmlPi(true); - t.setXmlPIs(true); - t.setQuiet(true); - t.setShowErrors(0); - t.setShowWarnings(false); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - try { - LOG.debug("Tidying input"); - t.parse(new ByteArrayInputStream(docBytes), out); - LOG.debug("Parsing tidied file"); - Document rv = _builder.parse(new ByteArrayInputStream(out.toByteArray())); - LOG.debug("Done with parsing"); - return rv; - } catch (SAXException ex) { - throw new HtmlParseException(ex); - } catch (IOException ex) { - throw new HtmlParseException(ex); - } - } -} Added: trunk/core/src/main/java/com/software416/jsimplebrowser/RequestInfo.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/RequestInfo.java (rev 0) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/RequestInfo.java 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,8 @@ +package com.software416.jsimplebrowser; + +import org.apache.commons.collections.MultiMap; + +public interface RequestInfo { + public int getResponseCode(); + public MultiMap getCookies(); +} Deleted: trunk/core/src/main/java/com/software416/jsimplebrowser/SimpleBrowser.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/SimpleBrowser.java 2007-08-07 21:56:12 UTC (rev 36) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/SimpleBrowser.java 2007-08-10 15:46:42 UTC (rev 37) @@ -1,221 +0,0 @@ -package com.software416.jsimplebrowser; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.Collection; -import java.util.Iterator; -import java.util.Map; -import java.util.Map.Entry; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.apache.commons.collections.MultiHashMap; -import org.apache.commons.collections.MultiMap; -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpException; -import org.apache.commons.httpclient.HttpMethod; -import org.apache.commons.httpclient.NameValuePair; -import org.apache.commons.httpclient.URI; -import org.apache.commons.httpclient.URIException; -import org.apache.commons.httpclient.methods.GetMethod; -import org.apache.commons.httpclient.methods.PostMethod; -import org.apache.commons.httpclient.methods.StringRequestEntity; -import org.apache.commons.io.IOUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; - -public class SimpleBrowser implements Browser { - @SuppressWarnings("unused") - private static final Log LOG = LogFactory.getLog(SimpleBrowser.class); - private HttpClient _client = new HttpClient(); - private HttpMethod _requestMethod; - private Document _responseDocument; - private byte[] _responseBody; - private int _responseCode; - private HtmlToDomConverter _converter = new HtmlToDomConverter(); - - public void go(String url) throws BrowserException{ - try { - GetMethod gm; - if (_requestMethod != null) { - gm = new GetMethod(); - gm.setURI(new URI(_requestMethod.getURI(), url, true)); - } else { - gm = new GetMethod(url); - } - makeRequest(gm); - } catch (IOException ex) { - throw new BrowserException(ex); - } - } - - public void submitForm(String formName, Map<String, String> userSpecifiedParameterMap) throws BrowserException { - try { - HttpMethod m = buildMethodForForm(formName, userSpecifiedParameterMap); - if (m == null) { - throw new BrowserException("Could not submit form as it does not exist!!"); - } - makeRequest(m); - } catch (IOException ex) { - throw new BrowserException(ex); - } - } - - public void makeRequestWithBody(String link, String body) throws BrowserException{ - PostMethod m = new PostMethod(link); - try { - m.setRequestEntity(new StringRequestEntity(body, "text/plain",null)); - makeRequest(m); - } catch (IOException ex) { - throw new BrowserException(ex); - } - } - - public MultiMap getCookies() { - MultiHashMap rv = new MultiHashMap(); - Header[] headers = _requestMethod.getRequestHeaders("Cookie"); - for (Header header : headers) { - String[] vals = header.getValue().split("\\s*;\\s*"); - for (String cookie : vals) { - int idx = cookie.indexOf("="); - if (idx > 0 && idx < cookie.length() - 2) { - rv.put(cookie.substring(0, idx), cookie.substring(idx + 1)); - } - } - } - return rv; - } - - public String getCookieValue(String cookieName) { - Object rv = getCookies().get(cookieName); - return rv == null ? "" : rv instanceof String ? rv.toString() : ((Collection)rv).iterator().next().toString(); - } - - protected synchronized void makeRequest(HttpMethod m) throws HttpException, IOException, BrowserException { - _requestMethod = m; - _responseCode = _client.executeMethod(_requestMethod); - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - IOUtils.copy(_requestMethod.getResponseBodyAsStream(), bos); - setResponseBody(bos.toByteArray()); - String link = findRefreshLink(); - if (link != null) { - go(link); - } - } - - protected String findRefreshLink() { - NodeList meta = _responseDocument.getElementsByTagName("meta"); - for (int x = 0; x < meta.getLength(); x++) { - Element e = (Element)meta.item(x); - if (e.getAttribute("http-equiv") != null && e.getAttribute("http-equiv") != null && e.getAttribute("http-equiv").equalsIgnoreCase("refresh")) { - String content = e.getAttribute("content"); - if (content != null) { - Matcher m = Pattern.compile(".*URL=([^;]+).*").matcher(content); - if (m.matches()) { - return m.group(1); - } - } - } - } - return null; - } - - @SuppressWarnings("unchecked") - protected HttpMethod buildMethodForForm(String formName, Map<String, String> userParamMap) throws URIException { - Element formElement = findFormElementByFormName(formName); - if (formElement == null) { - return null; - } - - String formMethod = formElement.getAttribute("method"); - boolean usePost = formMethod != null && formMethod.toUpperCase().equals("POST"); - HttpMethod rv = usePost ? new PostMethod() : new GetMethod(); -// Iterator formFieldIterator = formElement.findFormFields().iterator(); - Map paramsToUse = new MultiHashMap(); -// while (formFieldIterator.hasNext()) { -// FormField f = (FormField)formFieldIterator.next(); -// Collection c = f.getValues(); -// Iterator formFieldValueIterator = c.iterator(); -// while (formFieldValueIterator.hasNext()) { -// paramsToUse.put(f.getName(), new NameValuePair(f.getName(), formFieldValueIterator.next().toString())); -// } -// } - - for (Entry<String, String> e : userParamMap.entrySet()) { - paramsToUse.remove(e.getKey()); - paramsToUse.put(e.getKey(), new NameValuePair(e.getKey(), e.getValue())); - } - - rv.setURI(new URI(_requestMethod.getURI(), new URI(formElement.getAttribute("action"), true))); - - Collection params = paramsToUse.values(); - if (usePost) { - ((PostMethod)rv).setRequestBody((NameValuePair[])params.toArray(new NameValuePair[params.size()])); - } else { - Iterator i = params.iterator(); - StringBuilder sb = new StringBuilder(); - if (rv.getURI().getQuery() != null) { - sb.append(rv.getURI().getQuery()); - } - while (i.hasNext()) { - if (sb.length() > 0) { - sb.append("&"); - } - NameValuePair pair = (NameValuePair)i.next(); - sb.append(pair.getName() + "=" + pair.getValue()); - } - try { - URI newUri = (URI)rv.getURI().clone(); - newUri.setQuery(sb.toString()); - rv.setURI(newUri); - } catch (CloneNotSupportedException ex) { - // Should never happen! - throw new RuntimeException(ex); - } - } - - return rv; - } - - protected Element findFormElementByFormName(String name) { - NodeList formElements=_responseDocument.getElementsByTagName("form"); - for (int i = 0; i < formElements.getLength(); i++) { - Element formElement=(Element)formElements.item(i); - String formName=formElement.getAttribute("name"); - if (name.equals(formName)) { - return formElement; - } - } - return null; - } - - - public String getResponseHtml() { - return new String(_responseBody); - } - - public int getResponseCode() { - return _responseCode; - } - - protected void setRequestMethod(HttpMethod m) { - _requestMethod = m; - } - protected void setResponseBody(String responseBody) throws HtmlParseException { - setResponseBody(responseBody.getBytes()); - } - - protected void setResponseBody(byte[] responseBody) throws HtmlParseException { - _responseBody = responseBody; - _responseDocument = _converter.getDocumentFromHtml(responseBody); - } - public Document getResponseDocument() { - return _responseDocument; - } - - -} Added: trunk/core/src/main/java/com/software416/jsimplebrowser/Window.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/Window.java (rev 0) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/Window.java 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,14 @@ +package com.software416.jsimplebrowser; + +import org.apache.commons.collections.MultiMap; +import org.w3c.dom.Document; + +public interface Window { + public void open(String url) throws BrowserException; + public void submitForm(String formName, MultiMap parameters) throws BrowserException; + public RequestInfo getRequestInfo(); + public Document getDocument(); + public String getSource(); + public History getHistory(); + public String getLocation(); +} Added: trunk/core/src/main/java/com/software416/jsimplebrowser/impl/BrowserImpl.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/impl/BrowserImpl.java (rev 0) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/impl/BrowserImpl.java 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,73 @@ +package com.software416.jsimplebrowser.impl; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpException; +import org.apache.commons.httpclient.HttpMethod; +import org.apache.commons.httpclient.methods.PostMethod; +import org.apache.commons.httpclient.methods.StringRequestEntity; +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.software416.jsimplebrowser.Browser; +import com.software416.jsimplebrowser.BrowserException; +import com.software416.jsimplebrowser.Window; + +public class BrowserImpl implements Browser { + @SuppressWarnings("unused") + private static final Log LOG = LogFactory.getLog(BrowserImpl.class); + + private HttpClient _client = new HttpClient(); + private Map<String, WindowImpl> _browserWindows = new HashMap<String, WindowImpl>(); + + public BrowserImpl() { + _browserWindows.put(MAIN_BROWSER_WINDOW_NAME, new WindowImpl(this)); + } + + protected synchronized RequestInfoImpl makeRequest(HttpMethod m) throws BrowserException { + RequestInfoImpl ri = new RequestInfoImpl(); + ri.setRequestMethod(m); + + try { + ri.setResponseCode(_client.executeMethod(m)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + IOUtils.copy(m.getResponseBodyAsStream(), bos); + ri.setResponseBody(bos.toByteArray()); + } catch (HttpException ex) { + throw new BrowserException(ex); + } catch (IOException ex) { + throw new BrowserException(ex); + } + return ri; + } + + public void makeXmlHttpRequest(String url, String requestBody) throws BrowserException { + PostMethod m = new PostMethod(url); + try { + m.setRequestEntity(new StringRequestEntity(requestBody, "text/plain",null)); + makeRequest(m); + } catch (IOException ex) { + throw new BrowserException(ex); + } + } + + public void open(String url) throws BrowserException { + open(url, MAIN_BROWSER_WINDOW_NAME); + } + + public void open(String url, String window) throws BrowserException { + getWindow(window).open(url); + } + + public synchronized Window getWindow(String windowName) { + if (!_browserWindows.containsKey(windowName)) { + _browserWindows.put(windowName, new WindowImpl(this)); + } + return _browserWindows.get(windowName); + } +} Added: trunk/core/src/main/java/com/software416/jsimplebrowser/impl/RequestInfoImpl.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/impl/RequestInfoImpl.java (rev 0) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/impl/RequestInfoImpl.java 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,57 @@ +package com.software416.jsimplebrowser.impl; + +import java.util.Collection; + +import org.apache.commons.collections.MultiHashMap; +import org.apache.commons.collections.MultiMap; +import org.apache.commons.httpclient.Header; +import org.apache.commons.httpclient.HttpMethod; + +import com.software416.jsimplebrowser.RequestInfo; + +public class RequestInfoImpl implements RequestInfo{ + private HttpMethod _requestMethod; + private int _responseCode; + private byte[] _responseBody; + + + public MultiMap getCookies() { + MultiHashMap rv = new MultiHashMap(); + Header[] headers = _requestMethod.getRequestHeaders("Cookie"); + for (Header header : headers) { + String[] vals = header.getValue().split("\\s*;\\s*"); + for (String cookie : vals) { + int idx = cookie.indexOf("="); + if (idx > 0 && idx < cookie.length() - 2) { + rv.put(cookie.substring(0, idx), cookie.substring(idx + 1)); + } + } + } + return rv; + } + + public String getCookieValue(String cookieName) { + Object rv = getCookies().get(cookieName); + return rv == null ? "" : rv instanceof String ? rv.toString() : ((Collection)rv).iterator().next().toString(); + } + + public byte[] getResponseBody() { + return _responseBody; + } + public void setResponseBody(byte[] responseBody) { + _responseBody = responseBody; + } + public HttpMethod getRequestMethod() { + return _requestMethod; + } + public void setRequestMethod(HttpMethod requestMethod) { + _requestMethod = requestMethod; + } + public int getResponseCode() { + return _responseCode; + } + public void setResponseCode(int responseCode) { + _responseCode = responseCode; + } + +} Added: trunk/core/src/main/java/com/software416/jsimplebrowser/impl/WindowImpl.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/impl/WindowImpl.java (rev 0) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/impl/WindowImpl.java 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,264 @@ +package com.software416.jsimplebrowser.impl; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.collections.MultiHashMap; +import org.apache.commons.collections.MultiMap; +import org.apache.commons.httpclient.HttpMethod; +import org.apache.commons.httpclient.NameValuePair; +import org.apache.commons.httpclient.URI; +import org.apache.commons.httpclient.URIException; +import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.httpclient.methods.PostMethod; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; + +import com.software416.jsimplebrowser.BrowserException; +import com.software416.jsimplebrowser.BrowserRuntimeException; +import com.software416.jsimplebrowser.History; +import com.software416.jsimplebrowser.HtmlParseException; +import com.software416.jsimplebrowser.RequestInfo; +import com.software416.jsimplebrowser.Window; +import com.software416.jsimplebrowser.util.HtmlToDomConverter; + +public class WindowImpl implements Window { + private RequestInfoImpl _requestInfo; + private Document _responseDocument; + private BrowserImpl _browser; + private HtmlToDomConverter _converter = new HtmlToDomConverter(); + private boolean _doFollowRedirects = true; + + public WindowImpl(BrowserImpl b) { + _browser = b; + } + + public void open(String url) throws BrowserException { + try { + GetMethod gm; + if (_requestInfo != null) { + gm = new GetMethod(); + gm.setURI(new URI(_requestInfo.getRequestMethod().getURI(), url, true)); + } else { + gm = new GetMethod(url); + } + makeRequest(gm); + } catch (IOException ex) { + throw new BrowserException(ex); + } + } + + protected void makeRequest(HttpMethod hm) throws BrowserException { + handleResponse(_browser.makeRequest(hm)); + } + + public void handleResponse(RequestInfoImpl requestInfo) throws BrowserException { + _requestInfo = requestInfo; + _responseDocument = null; + if (_doFollowRedirects) { + String link = findRefreshLink(); + if (link != null) { + open(link); + } + } + } + + public void submitForm(String formName, MultiMap userSpecifiedParameterMap) throws BrowserException { + try { + HttpMethod m = buildMethodForForm(formName, userSpecifiedParameterMap); + if (m == null) { + throw new BrowserException("Could not submit form as it does not exist!!"); + } + makeRequest(m); + } catch (IOException ex) { + throw new BrowserException(ex); + } + } + + protected String findRefreshLink() { + NodeList meta = getDocument().getElementsByTagName("meta"); + for (int x = 0; x < meta.getLength(); x++) { + Element e = (Element)meta.item(x); + if (e.getAttribute("http-equiv") != null && e.getAttribute("http-equiv") != null && e.getAttribute("http-equiv").equalsIgnoreCase("refresh")) { + String content = e.getAttribute("content"); + if (content != null) { + Matcher m = Pattern.compile(".*URL=([^;]+).*").matcher(content); + if (m.matches()) { + return m.group(1); + } + } + } + } + return null; + } + + @SuppressWarnings("unchecked") + protected HttpMethod buildMethodForForm(String formName, MultiMap userParamMap) throws URIException { + Element formElement = findFormElementByFormName(formName); + if (formElement == null) { + return null; + } + + MultiMap formParameters = populateMapWithDefaultValuesFromForm(formElement); + + mergeFormParameters(userParamMap, formParameters); + + String formMethod = formElement.getAttribute("method"); + boolean usePost = formMethod != null && formMethod.toUpperCase().equals("POST"); + HttpMethod rv = usePost ? new PostMethod() : new GetMethod(); + + rv.setURI(new URI(_requestInfo.getRequestMethod().getURI(), new URI(formElement.getAttribute("action"), true))); + + Collection params = formParameters.values(); + if (usePost) { + ((PostMethod)rv).setRequestBody((NameValuePair[])params.toArray(new NameValuePair[params.size()])); + } else { + Iterator i = params.iterator(); + StringBuilder sb = new StringBuilder(); + if (rv.getURI().getQuery() != null) { + sb.append(rv.getURI().getQuery()); + } + while (i.hasNext()) { + if (sb.length() > 0) { + sb.append("&"); + } + NameValuePair pair = (NameValuePair)i.next(); + sb.append(pair.getName() + "=" + pair.getValue()); + } + try { + URI newUri = (URI)rv.getURI().clone(); + newUri.setQuery(sb.toString()); + rv.setURI(newUri); + } catch (CloneNotSupportedException ex) { + // Should never happen! + throw new RuntimeException(ex); + } + } + + return rv; + } + + private MultiMap populateMapWithDefaultValuesFromForm(Element formElement) { + MultiMap formParameters = new MultiHashMap(); + populateFormParametersFromInputTags(formElement, formParameters); + populateFormParametersFromSelectLists(formElement, formParameters); + return formParameters; + } + + private void populateFormParametersFromSelectLists(Element formElement, MultiMap formParameters) { + NodeList nodes; + nodes = formElement.getElementsByTagName("select"); + for (int x = 0; x < nodes.getLength(); x++) { + Element e = (Element)nodes.item(x); + String name = e.getAttribute("name"); + if (name != null) { + NodeList options = e.getElementsByTagName("option"); + for (int y = 0; y < options.getLength(); y++) { + Element option = (Element)options.item(y); + if (option.hasAttribute("selected") && !option.getAttribute("selected").equalsIgnoreCase("false")) { + String value = option.getAttribute("value"); + if (value == null||value.equals("")) { + value = option.getTextContent(); + } + formParameters.put(name, new NameValuePair(name,value)); + } + } + } + } + } + + private void populateFormParametersFromInputTags(Element formElement, MultiMap formParameters) { + NodeList nodes = formElement.getElementsByTagName("input"); + for (int x = 0; x < nodes.getLength(); x++) { + Element e = (Element)nodes.item(x); + String name = e.getAttribute("name"); + String type = e.getAttribute("type"); + if (name != null) { + if (type == null || type.equalsIgnoreCase("text") || type.equalsIgnoreCase("password") || type.equalsIgnoreCase("hidden")) { + formParameters.put(name, new NameValuePair(name,e.getAttribute("value"))); + } else if (type.equalsIgnoreCase("checkbox") || type.equalsIgnoreCase("radio")) { + if (e.hasAttribute("checked") && !e.getAttribute("checked").equalsIgnoreCase("false")) { + formParameters.put(name, new NameValuePair(name, e.getAttribute("value"))); + } + } + } + } + } + + private void mergeFormParameters(MultiMap userParamMap, MultiMap formParameters) { + Iterator userParamIterator = userParamMap.keySet().iterator(); + while (userParamIterator.hasNext()) { + String key = userParamIterator.next().toString(); + formParameters.remove(key); + Collection c = (Collection)userParamMap.get(key); + Iterator i2 = c.iterator(); + while (i2.hasNext()) { + formParameters.put(key, new NameValuePair(key, i2.next().toString())); + } + } + } + + protected Element findFormElementByFormName(String name) { + NodeList formElements=getDocument().getElementsByTagName("form"); + for (int i = 0; i < formElements.getLength(); i++) { + Element formElement=(Element)formElements.item(i); + String formName=formElement.getAttribute("name"); + if (name.equals(formName)) { + return formElement; + } + } + return null; + } + + + public String getSource() { + return new String(_requestInfo.getResponseBody()); + } + + public Document getDocument() { + if (_responseDocument == null) { + try { + _responseDocument = _converter.getDocumentFromHtml(_requestInfo.getResponseBody()); + } catch (HtmlParseException ex) { + throw new BrowserRuntimeException(ex); + } + } + return _responseDocument; + } + + public History getHistory() { + // TODO Auto-generated method stub + return null; + } + + public String getLocation() { + try { + return _requestInfo.getRequestMethod().getURI().toString(); + } catch (URIException ex) { + throw new BrowserRuntimeException(ex); + } + } + + public RequestInfo getRequestInfo() { + return _requestInfo; + } + + protected int getResponseCode() { + return _requestInfo.getResponseCode(); + } + + public boolean isDoFollowRedirects() { + return _doFollowRedirects; + } + + public void setDoFollowRedirects(boolean doFollowRedirects) { + _doFollowRedirects = doFollowRedirects; + } + +} + + Added: trunk/core/src/main/java/com/software416/jsimplebrowser/util/BrowserHelper.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/util/BrowserHelper.java (rev 0) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/util/BrowserHelper.java 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,51 @@ +package com.software416.jsimplebrowser.util; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; + +import com.software416.jsimplebrowser.Browser; + +public class BrowserHelper { + private static final Log LOG = LogFactory.getLog(BrowserHelper.class); + private Browser _browser; + public BrowserHelper(Browser browser) { + _browser = browser; + } + + public String getFirstLinkForTextRegex(String regex) { + List<String> links = getLinksForTextRegex(regex, 1); + return links.size() == 0 ? null : links.get(0); + } + + public List<String> getLinksForTextRegex(String regex) { + return getLinksForTextRegex(regex, Integer.MAX_VALUE); + } + + public List<String> getLinksForTextRegex(String regex, int maxResults) { + return getLinksForTextRegex(regex, maxResults, Browser.MAIN_BROWSER_WINDOW_NAME); + } + public List<String> getLinksForTextRegex(String regex, int maxResults, String windowName) { + List<String> rv = new ArrayList<String>(); + NodeList nl = _browser.getWindow(windowName).getDocument().getElementsByTagName("a"); + Pattern p = Pattern.compile(".*" + regex + ".*", Pattern.CASE_INSENSITIVE); + for (int x = 0; x < nl.getLength() && rv.size() < maxResults; x++) { + Element element = (Element)nl.item(x); + String href = element.getAttribute("href"); + if (LOG.isDebugEnabled()) { + LOG.debug("matching " + regex + " against content " + element.getTextContent() + " with href " + href); + } + if (href != null && element.getTextContent() != null && p.matcher(element.getTextContent()).matches()) { + rv.add(href); + } + } + return rv; + } + + +} Added: trunk/core/src/main/java/com/software416/jsimplebrowser/util/HtmlToDomConverter.java =================================================================== --- trunk/core/src/main/java/com/software416/jsimplebrowser/util/HtmlToDomConverter.java (rev 0) +++ trunk/core/src/main/java/com/software416/jsimplebrowser/util/HtmlToDomConverter.java 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,29 @@ +package com.software416.jsimplebrowser.util; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.cyberneko.html.parsers.DOMParser; +import org.w3c.dom.Document; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +import com.software416.jsimplebrowser.HtmlParseException; + +public class HtmlToDomConverter { + private static final Log LOG = LogFactory.getLog(HtmlToDomConverter.class); + public Document getDocumentFromHtml(byte[] docBytes) throws HtmlParseException { + try { + LOG.debug("Tidying input"); + DOMParser p = new DOMParser(); + p.parse(new InputSource(new ByteArrayInputStream(docBytes))); + return p.getDocument(); + } catch (SAXException ex) { + throw new HtmlParseException(ex); + } catch (IOException ex) { + throw new HtmlParseException(ex); + } + } +} Deleted: trunk/core/src/test/java/com/software416/jsimplebrowser/BrowserTest.java =================================================================== --- trunk/core/src/test/java/com/software416/jsimplebrowser/BrowserTest.java 2007-08-07 21:56:12 UTC (rev 36) +++ trunk/core/src/test/java/com/software416/jsimplebrowser/BrowserTest.java 2007-08-10 15:46:42 UTC (rev 37) @@ -1,91 +0,0 @@ -package com.software416.jsimplebrowser; - -import static org.junit.Assert.*; - -import java.util.HashMap; -import java.util.Map; - -import org.apache.commons.httpclient.HttpMethod; -import org.apache.commons.httpclient.URI; -import org.apache.commons.httpclient.URIException; -import org.apache.commons.httpclient.methods.GetMethod; -import org.apache.commons.httpclient.methods.PostMethod; -import org.junit.BeforeClass; -import org.junit.Test; - -public class BrowserTest { - @BeforeClass public static void setUpLog() { - System.setProperty("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.SimpleLog"); - System.setProperty("org.apache.commons.logging.simplelog.showdatetime", "true"); - System.setProperty("org.apache.commons.logging.simplelog.log.com.software416", "debug"); - } - @Test public void testLinkParsing() throws HtmlParseException { - SimpleBrowser b = new SimpleBrowser(); - BrowserHelper bh = new BrowserHelper(b); - b.setResponseBody("<html><body><a href=\"foo.html\">Sign In</a></body></html>"); - assertEquals("foo.html", bh.getFirstLinkForTextRegex("Sign In")); - // check case insensitive of link - assertEquals("foo.html", bh.getFirstLinkForTextRegex("Sign in")); - // check case insensitive of tag - b.setResponseBody("<html><body><A href=\"foo.html\">Sign In</A></body></html>"); - assertEquals("foo.html", bh.getFirstLinkForTextRegex("Sign In")); - assertNull(bh.getFirstLinkForTextRegex("Foo")); - } - - @Test public void testGetMetaRefresh() throws HtmlParseException { - SimpleBrowser b = new SimpleBrowser(); - b.setResponseBody("<html><body><meta http-equiv=\"Refresh\" content=\"0;URL=http://www.linkedin.com/home\"></body></html>"); - assertEquals("http://www.linkedin.com/home", b.findRefreshLink()); - b.setResponseBody("<html><body></body></html>"); - assertNull(b.findRefreshLink()); - b.setResponseBody("<html><body><meta></meta></body></html>"); - assertNull(b.findRefreshLink()); - b.setResponseBody("<html><body><meta http-equiv=\"Foo\"></meta></body></html>"); - assertNull(b.findRefreshLink()); - b.setResponseBody("<html><body><meta http-equiv=\"Refreshment\"></meta></body></html>"); - assertNull(b.findRefreshLink()); - b.setResponseBody("<html><body><meta http-equiv=\"Refreshment\" content=\"blah\"></meta></body></html>"); - assertNull(b.findRefreshLink()); - } - - @Test public void testFormMethodGeneration() throws URIException, HtmlParseException { - String baseForm = "<html><body><form name=\"%1$s\" method=\"%2$s\" action=\"%3$s\"></form></body></html>"; - SimpleBrowser b = new SimpleBrowser(); - b.setRequestMethod(new GetMethod("http://www.foo.com")); - - b.setResponseBody(String.format(baseForm, "login", "GET", "http://foo.com/login")); - HttpMethod m = b.buildMethodForForm("login", new HashMap<String, String>()); - assertNotNull(m); - assertEquals(GetMethod.class, m.getClass()); - assertEquals(m.getURI(), new URI("http://foo.com/login", true)); - - b.setResponseBody(String.format(baseForm, "login", "GET", "/login")); - m = b.buildMethodForForm("login", new HashMap<String, String>()); - assertNotNull(m); - assertEquals(GetMethod.class, m.getClass()); - assertEquals(m.getURI(), new URI("http://www.foo.com/login", true)); - - b.setResponseBody(String.format(baseForm, "login", "GET", "/login")); - Map<String,String> params = new HashMap<String, String>(); - params.put("test_key", "test_value"); - params.put("test2", "foo"); - m = b.buildMethodForForm("login", params); - assertNotNull(m); - assertEquals(GetMethod.class, m.getClass()); - assertEquals(new URI("http://www.foo.com/login?test_key=test_value&test2=foo", true), m.getURI()); - - b.setResponseBody(String.format(baseForm, "login", "POST", "https://bar.com/login")); - m = b.buildMethodForForm("login", new HashMap<String, String>()); - assertNotNull(m); - assertEquals(PostMethod.class, m.getClass()); - assertEquals(m.getURI(), new URI("https://bar.com/login", true)); - - b.setResponseBody(String.format(baseForm, "login", "POST", "login")); - m = b.buildMethodForForm("login", new HashMap<String, String>()); - assertNotNull(m); - assertEquals(PostMethod.class, m.getClass()); - assertEquals(m.getURI(), new URI("http://www.foo.com/login", true)); - - assertNull(b.buildMethodForForm("noform", new HashMap<String, String>())); - } -} Added: trunk/core/src/test/java/com/software416/jsimplebrowser/impl/BrowserTest.java =================================================================== --- trunk/core/src/test/java/com/software416/jsimplebrowser/impl/BrowserTest.java (rev 0) +++ trunk/core/src/test/java/com/software416/jsimplebrowser/impl/BrowserTest.java 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,124 @@ +package com.software416.jsimplebrowser.impl; + +import static org.junit.Assert.*; + +import java.io.IOException; + +import org.apache.commons.collections.MultiHashMap; +import org.apache.commons.collections.MultiMap; +import org.apache.commons.httpclient.HttpMethod; +import org.apache.commons.httpclient.URI; +import org.apache.commons.httpclient.URIException; +import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.httpclient.methods.PostMethod; +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.LogFactory; +import org.junit.BeforeClass; +import org.junit.Test; + +import com.software416.jsimplebrowser.BrowserException; + +public class BrowserTest { + @BeforeClass public static void setUpLog() { + System.setProperty("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.SimpleLog"); + System.setProperty("org.apache.commons.logging.simplelog.showdatetime", "true"); + System.setProperty("org.apache.commons.logging.simplelog.log.com.software416", "debug"); + } + + @Test public void testParsingBadPages() throws BrowserException, IOException { + String baseForm = IOUtils.toString(Thread.currentThread().getContextClassLoader().getResourceAsStream("badlyFormattedPage.html")); + RequestInfoImpl requestInfo = new RequestInfoImpl(); + requestInfo.setRequestMethod(new GetMethod("http://www.foo.com")); + requestInfo.setResponseBody(baseForm.getBytes()); + WindowImpl wi = new WindowImpl(null); + wi.handleResponse(requestInfo); + } + + @Test public void testGetMetaRefresh() throws BrowserException { + WindowImpl w = new WindowImpl(null); + w.setDoFollowRedirects(false); + RequestInfoImpl ri = new RequestInfoImpl(); + ri.setResponseBody("<html><body><meta http-equiv=\"Refresh\" content=\"0;URL=http://www.linkedin.com/home\"></body></html>".getBytes()); + w.handleResponse(ri); + assertEquals("http://www.linkedin.com/home", w.findRefreshLink()); + ri.setResponseBody("<html><body></body></html>".getBytes()); + w.handleResponse(ri); + assertNull(w.findRefreshLink()); + ri.setResponseBody("<html><body><meta></meta></body></html>".getBytes()); + w.handleResponse(ri); + assertNull(w.findRefreshLink()); + ri.setResponseBody("<html><body><meta http-equiv=\"Foo\"></meta></body></html>".getBytes()); + w.handleResponse(ri); + assertNull(w.findRefreshLink()); + ri.setResponseBody("<html><body><meta http-equiv=\"Refreshment\"></meta></body></html>".getBytes()); + w.handleResponse(ri); + assertNull(w.findRefreshLink()); + ri.setResponseBody("<html><body><meta http-equiv=\"Refreshment\" content=\"blah\"></meta></body></html>".getBytes()); + w.handleResponse(ri); + assertNull(w.findRefreshLink()); + } + @Test public void testDefaultFormValuesMethodGeneration() throws BrowserException, IOException { + String baseForm = IOUtils.toString(Thread.currentThread().getContextClassLoader().getResourceAsStream("formTest1.html")); + RequestInfoImpl requestInfo = new RequestInfoImpl(); + requestInfo.setRequestMethod(new GetMethod("http://www.foo.com")); + requestInfo.setResponseBody(baseForm.getBytes()); + WindowImpl wi = new WindowImpl(null); + wi.handleResponse(requestInfo); + HttpMethod m = wi.buildMethodForForm("login", new MultiHashMap()); + assertNotNull(m); + assertEquals(GetMethod.class, m.getClass()); + assertEquals(m.getURI().getPath(), "/foo.action"); + LogFactory.getLog(getClass()).debug(m.getURI().getQuery()); + assertTrue(m.getURI().getQuery().contains("foo=baz")); + assertTrue(m.getURI().getQuery().contains("password=secret")); + assertTrue(m.getURI().getQuery().contains("checkedBox=yep")); + assertTrue(m.getURI().getQuery().contains("radioB=yep")); + assertTrue(m.getURI().getQuery().contains("selectInput=IsSelected")); + assertTrue(m.getURI().getQuery().contains("selectInput=youGotIt")); + assertEquals(6, m.getURI().getQuery().split("&").length); + } + @Test public void testFormMethodGeneration() throws BrowserException, URIException { + String baseForm = "<html><body><form name=\"%1$s\" method=\"%2$s\" action=\"%3$s\"></form></body></html>"; + RequestInfoImpl requestInfo = new RequestInfoImpl(); + requestInfo.setRequestMethod(new GetMethod("http://www.foo.com")); + requestInfo.setResponseBody(String.format(baseForm, "login", "GET", "http://foo.com/login").getBytes()); + WindowImpl wi = new WindowImpl(null); + wi.handleResponse(requestInfo); + HttpMethod m = wi.buildMethodForForm("login", new MultiHashMap()); + assertNotNull(m); + assertEquals(GetMethod.class, m.getClass()); + assertEquals(m.getURI(), new URI("http://foo.com/login", true)); + + requestInfo.setResponseBody(String.format(baseForm, "login", "GET", "/login").getBytes()); + wi.handleResponse(requestInfo); + m = wi.buildMethodForForm("login", new MultiHashMap()); + assertNotNull(m); + assertEquals(GetMethod.class, m.getClass()); + assertEquals(m.getURI(), new URI("http://www.foo.com/login", true)); + + requestInfo.setResponseBody(String.format(baseForm, "login", "GET", "/login").getBytes()); + MultiMap params = new MultiHashMap(); + params.put("test_key", "test_value"); + params.put("test2", "foo"); + m = wi.buildMethodForForm("login", params); + assertNotNull(m); + assertEquals(GetMethod.class, m.getClass()); + assertEquals(new URI("http://www.foo.com/login?test_key=test_value&test2=foo", true), m.getURI()); + + requestInfo.setResponseBody(String.format(baseForm, "login", "POST", "https://bar.com/login").getBytes()); + wi.handleResponse(requestInfo); + m = wi.buildMethodForForm("login", new MultiHashMap()); + assertNotNull(m); + assertEquals(PostMethod.class, m.getClass()); + assertEquals(m.getURI(), new URI("https://bar.com/login", true)); + + requestInfo.setResponseBody(String.format(baseForm, "login", "POST", "login").getBytes()); + wi.handleResponse(requestInfo); + m = wi.buildMethodForForm("login", new MultiHashMap()); + assertNotNull(m); + assertEquals(PostMethod.class, m.getClass()); + assertEquals(m.getURI(), new URI("http://www.foo.com/login", true)); + + assertNull(wi.buildMethodForForm("noform", new MultiHashMap())); + } +} Added: trunk/core/src/test/java/com/software416/jsimplebrowser/util/BrowserHelperTest.java =================================================================== --- trunk/core/src/test/java/com/software416/jsimplebrowser/util/BrowserHelperTest.java (rev 0) +++ trunk/core/src/test/java/com/software416/jsimplebrowser/util/BrowserHelperTest.java 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,39 @@ +package com.software416.jsimplebrowser.util; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import java.io.IOException; + +import org.apache.commons.io.IOUtils; +import org.junit.Test; + +import com.software416.jsimplebrowser.Browser; +import com.software416.jsimplebrowser.BrowserException; +import com.software416.jsimplebrowser.impl.BrowserImpl; +import com.software416.jsimplebrowser.impl.RequestInfoImpl; +import com.software416.jsimplebrowser.impl.WindowImpl; + +public class BrowserHelperTest { + @Test public void testLinkParsing() throws BrowserException, IOException { + BrowserImpl b = new BrowserImpl(); + BrowserHelper bh = new BrowserHelper(b); + WindowImpl wi = (WindowImpl)b.getWindow(Browser.MAIN_BROWSER_WINDOW_NAME); + RequestInfoImpl ri = new RequestInfoImpl(); + ri.setResponseBody("<html><body><a href=\"foo.html\">Sign In</a></body></html>".getBytes()); + wi.handleResponse(ri); + assertEquals("foo.html", bh.getFirstLinkForTextRegex("Sign In")); + // check case insensitive of link + assertEquals("foo.html", bh.getFirstLinkForTextRegex("Sign in")); + // check case insensitive of tag + ri.setResponseBody("<html><body><A href=\"foo.html\">Sign In</A></body></html>".getBytes()); + wi.handleResponse(ri); + assertEquals("foo.html", bh.getFirstLinkForTextRegex("Sign\\s+In")); + assertNull(bh.getFirstLinkForTextRegex("Foo")); + ri.setResponseBody(IOUtils.toString(Thread.currentThread().getContextClassLoader().getResourceAsStream("linkTest1.html")).getBytes()); + wi.handleResponse(ri); + assertEquals("https://www.linkedin.com/secure/login?trk=ghdr_signin", bh.getFirstLinkForTextRegex("Sign\\s+In")); + } + + +} Added: trunk/core/src/test/resources/badlyFormattedPage.html =================================================================== --- trunk/core/src/test/resources/badlyFormattedPage.html (rev 0) +++ trunk/core/src/test/resources/badlyFormattedPage.html 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,66 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en-US"> +<head name="connections_browser"> + <title>LinkedIn: My Contacts: Connections</title> + <meta http-equiv="content-type" content="text/html; charset=UTF-8"> + + <link rel="shortcut icon" type="image/ico" href="/favicon.ico"> + <link rel="stylesheet" type="text/css" href="/css/style.css?v=build-399_3_1424"> + <link rel="stylesheet" type="text/css" href="/css/cobrand/no_cobrand.css?v=build-399_3_1424"> + <script type="text/javascript" src="/js/scripts.js?v=build-399_3_1424"></script> + <script type="text/javascript" src="/js/searchbar.js?v=build-399_3_1424"></script> + + + <link rel="stylesheet" type="text/css" href="/css/connection-browser.css?v=build-399_3_1424"> + + <script type="text/javascript" src="/js/dwr/util.js?v=build-399_3_1424"></script> + <script type="text/javascript" src="/js/dwr/engine.js?v=build-399_3_1424"></script> + <script type="text/javascript" src="/js/dwr/engine_fix.js?v=build-399_3_1424"></script> + <script type="text/javascript" src="/js/yui/yahoo-dom-event.js?v=build-399_3_1424"></script> + <script type="text/javascript" src="/js/lui/log4javascript.js?v=build-399_3_1424"></script> + <script type="text/javascript" src="/js/lui/linkedin.js?v=build-399_3_1424"></script> + <script type="text/javascript"> + DWREngine.setErrorHandler(function(message, ex) { + alert('We are sorry. It looks like there is a problem with your request.'); + }); + DWREngine.setTextHtmlHandler(function(message, ex) { + alert('We are sorry. It looks like there is a problem with your request.'); + }); + </script> + <script type="text/javascript" src="/js/lui/linkedin_goback.js?v=build-399_3_1424"></script> + <script type="text/javascript" src="/js/ajax/connections_browser_service.js?v=build-399_3_1424"></script> + <script type="text/javascript" src="/dwr/interface/ConnectionsBrowserService.js"></script> + + <script type="text/javascript" src="/js/lui/linkedin_json.js?v=build-399_3_1424"></script> + <script type="text/javascript" src="/js/lui/linkedin_ui.js?v=build-399_3_1424"></script> +</head> + +<script type="text/javascript"> +connectionsBrowser.setThreshold(500); +connectionsBrowser.setErrorMessage("There was a problem loading your connections. Try to refresh the page."); +connectionsBrowser.setMessageNoCon('<a href="/findContacts?displayFindContact=&membersOnly=membersOnly&context=2&sortAction=lastname&trk=cnx_noconx" >Start building your network.</a> Discover which friends and colleagues are already LinkedIn.'); +connectionsBrowser.setMessageFilterNoCon("None of your connections meet the filtering criteria specified above. Reset the filters to see all of your connections."); +</script> + +<body class="my-contacts"> +<div id="main"> + <noscript> + <h1> + <span>My Contacts:</span> Connections + <div class="hdrlink"> + + + <p class="dc88x31"> + <script type="text/javascript"> + var dbl_page = 'connections_browser'; + var dbl_tile = '5'; + var dbl_sz = '88x31'; + </script> + <script type="text/javascript" src="/js/doubleclick.js?v=build-399_3_1424"></script> + </p> +</div> + </h1> + <p>You currently have JavaScript disabled or are using a browser that doesn't support it. Either enable JavaScript and refresh this page or proceed to the <a href="/connectionsnojs?trk=cnx_nojslink" >basic connection browser</a>.</p> + </noscript> +</body> +</html> \ No newline at end of file Added: trunk/core/src/test/resources/formTest1.html =================================================================== --- trunk/core/src/test/resources/formTest1.html (rev 0) +++ trunk/core/src/test/resources/formTest1.html 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,20 @@ +<html> + <body> + <form name="login" method="GET" action="/foo.action"> + <input type="text" name="foo" value="baz" /> + <input type="invalid" name="foo1" value="baz" /> + <input type="password" name="password" value="secret" /> + <input type="checkbox" name="uncheckedBox" value="secret" /> + <input type="checkbox" checked name="checkedBox" value="yep" /> + <input type="radio" name="radioB" value="secret" /> + <input type="radio" checked name="radioB" value="yep" /> + <input type="button" name="Foo" value="Bad Button" /> + <select name="selectInput"> + <option></option> + <option value="false">False</option> + <option selected>IsSelected</option> + <option selected="true" value="youGotIt">Use Value</option> + </select> + </form> + </body> +</html> \ No newline at end of file Added: trunk/core/src/test/resources/linkTest1.html =================================================================== --- trunk/core/src/test/resources/linkTest1.html (rev 0) +++ trunk/core/src/test/resources/linkTest1.html 2007-08-10 15:46:42 UTC (rev 37) @@ -0,0 +1,11 @@ +<html><body><div id="hdr"> + <h1><a href="/home?trk=ghdr_logo" >LinkedIn</a></h1> + <ul> + <li><a href="/static?key=what_is_linkedin&trk=ghdr_whatis" ><strong>What is LinkedIn?</strong></a></li> + <li><a href="https://www.linkedin.com/secure/register?trk=ghdr_join" ><strong>Join now</strong></a></li> + + <li class="signin">Already a user? <a href="https://www.linkedin.com/secure/login?trk=ghdr_signin" >Sign in</a></li> + </ul> +</div> +</body> +</html> \ No newline at end of file Modified: trunk/pom.xml =================================================================== --- trunk/pom.xml 2007-08-07 21:56:12 UTC (rev 36) +++ trunk/pom.xml 2007-08-10 15:46:42 UTC (rev 37) @@ -5,6 +5,18 @@ <name>JSimpleBrowser</name> <version>1.0.0-SNAPSHOT</version> <packaging>pom</packaging> + <distributionManagement> + <snapshotRepository> + <id>snapshots</id> + <uniqueVersion>true</uniqueVersion> + <url>http://invincible.dynalias.com:8081/artifactory/libs-snapshots</url> + </snapshotRepository> + <repository> + <id>central</id> + <uniqueVersion>false</uniqueVersion> + <url>http://invincible.dynalias.com:8081/artifactory/libs-snapshots</url> + </repository> + </distributionManagement> <url>http://jsimplebrowser.sourceforge.net/</url> <modules> <module>core</module> @@ -78,15 +90,25 @@ <scope>provided</scope> </dependency> <dependency> + <groupId>commons-lang</groupId> + <artifactId>commons-lang</artifactId> + <version>2.3</version> + </dependency> + <dependency> <groupId>commons-collections</groupId> <artifactId>commons-collections</artifactId> <version>3.1</version> </dependency> <dependency> - <groupId>org.hibernate</groupId> + <groupId>org.cyberneko</groupId> + <artifactId>html-core</artifactId> + <version>1.0.0-SNAPSHOT</version> + </dependency> + <!--<dependency> + <groupId>jtidy</groupId> <artifactId>jtidy</artifactId> - <version>r8-21122004</version> - </dependency> + <version>8.0-SNAPSHOT</version> + </dependency>--> <dependency> <groupId>commons-lang</groupId> <artifactId>commons-lang</artifactId> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |