Revision: 3410
http://archive-access.svn.sourceforge.net/archive-access/?rev=3410&view=rev
Author: bradtofel
Date: 2011-02-06 14:54:55 +0000 (Sun, 06 Feb 2011)
Log Message:
-----------
initial rev, attempting end-to-end testing of HTML rewrite
Added Paths:
-----------
trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandlerTest.java
Added: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandlerTest.java
===================================================================
--- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandlerTest.java (rev 0)
+++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandlerTest.java 2011-02-06 14:54:55 UTC (rev 3410)
@@ -0,0 +1,91 @@
+package org.archive.wayback.archivalurl;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.MalformedURLException;
+import java.net.URL;
+
+import org.archive.wayback.ResultURIConverter;
+import org.archive.wayback.replay.html.ReplayParseContext;
+import org.archive.wayback.util.htmllex.ContextAwareLexer;
+import org.htmlparser.Node;
+import org.htmlparser.lexer.Lexer;
+import org.htmlparser.lexer.Page;
+import org.htmlparser.util.ParserException;
+
+import junit.framework.TestCase;
+
+public class FastArchivalUrlReplayParseEventHandlerTest extends TestCase {
+
+
+
+
+ public void testRewrite() throws Exception {
+ assertEquals("<html><a href=\"http://replay.archive.org/2001/http://www.example.com/foo.html\">foo</a></html>",doEndToEnd("<html><a href=\"/foo.html\">foo</a></html>"));
+ assertEquals("<html><a href=\"http://replay.archive.org/2001/http://www.example.com/foo.html\">foo</a></html>",doEndToEnd("<html><a href=\"foo.html\">foo</a></html>"));
+ assertEquals("<html><a href=\"javascript:doWin('http://replay.archive.org/2001/http://www.symphony.org/')\">American Symphony Orchestra League</a></html>",doEndToEnd("<html><a href=\"javascript:doWin('http://www.symphony.org')\">American Symphony Orchestra League</a></html>"));
+ }
+
+ public String doEndToEnd(String input) throws Exception {
+ String baseUrl = "http://www.example.com/";
+ String timestamp = "2001";
+ String outputCharset = "UTF-8";
+ String charSet = "UTF-8";
+
+ ByteArrayInputStream bais = new ByteArrayInputStream(input.getBytes(charSet));
+
+ FastArchivalUrlReplayParseEventHandler delegator = new FastArchivalUrlReplayParseEventHandler();
+ delegator.setCommentJsp(null);
+ delegator.setJspInsertPath(null);
+
+ ArchivalUrlResultURIConverter uriConverter = new ArchivalUrlResultURIConverter();
+ uriConverter.setReplayURIPrefix("http://replay.archive.org/");
+
+ ArchivalUrlContextResultURIConverterFactory fact =
+ new ArchivalUrlContextResultURIConverterFactory(
+ (ArchivalUrlResultURIConverter) uriConverter);
+
+ // The URL of the page, for resolving in-page relative URLs:
+ URL url = null;
+ try {
+ url = new URL(baseUrl);
+ } catch (MalformedURLException e1) {
+ // TODO: this shouldn't happen...
+ e1.printStackTrace();
+ throw new IOException(e1.getMessage());
+ }
+
+ // To make sure we get the length, we have to buffer it all up...
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+ // set up the context:
+ ReplayParseContext context =
+ new ReplayParseContext(fact,url,timestamp);
+ context.setOutputCharset(outputCharset);
+ context.setOutputStream(baos);
+ context.setJspExec(null);
+
+ // and finally, parse, using the special lexer that knows how to
+ // handle javascript blocks containing unescaped HTML entities:
+ Page lexPage = new Page(bais,charSet);
+ Lexer lexer = new Lexer(lexPage);
+ Lexer.STRICT_REMARKS = false;
+ ContextAwareLexer lex = new ContextAwareLexer(lexer, context);
+ Node node;
+ try {
+ while((node = lex.nextNode()) != null) {
+ delegator.handleNode(context, node);
+ }
+ delegator.handleParseComplete(context);
+ } catch (ParserException e) {
+ e.printStackTrace();
+ throw new IOException(e.getMessage());
+ }
+
+ // At this point, baos contains the utf-8 encoded bytes of our result:
+ return new String(baos.toByteArray(),outputCharset);
+
+ }
+}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|