Revision: 3410 http://archive-access.svn.sourceforge.net/archive-access/?rev=3410&view=rev Author: bradtofel Date: 2011-02-06 14:54:55 +0000 (Sun, 06 Feb 2011) Log Message: ----------- initial rev, attempting end-to-end testing of HTML rewrite Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandlerTest.java Added: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandlerTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandlerTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/FastArchivalUrlReplayParseEventHandlerTest.java 2011-02-06 14:54:55 UTC (rev 3410) @@ -0,0 +1,91 @@ +package org.archive.wayback.archivalurl; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URL; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.replay.html.ReplayParseContext; +import org.archive.wayback.util.htmllex.ContextAwareLexer; +import org.htmlparser.Node; +import org.htmlparser.lexer.Lexer; +import org.htmlparser.lexer.Page; +import org.htmlparser.util.ParserException; + +import junit.framework.TestCase; + +public class FastArchivalUrlReplayParseEventHandlerTest extends TestCase { + + + + + public void testRewrite() throws Exception { + assertEquals("<html><a href=\"http://replay.archive.org/2001/http://www.example.com/foo.html\">foo</a></html>",doEndToEnd("<html><a href=\"/foo.html\">foo</a></html>")); + assertEquals("<html><a href=\"http://replay.archive.org/2001/http://www.example.com/foo.html\">foo</a></html>",doEndToEnd("<html><a href=\"foo.html\">foo</a></html>")); + assertEquals("<html><a href=\"javascript:doWin('http://replay.archive.org/2001/http://www.symphony.org/')\">American Symphony Orchestra League</a></html>",doEndToEnd("<html><a href=\"javascript:doWin('http://www.symphony.org')\">American Symphony Orchestra League</a></html>")); + } + + public String doEndToEnd(String input) throws Exception { + String baseUrl = "http://www.example.com/"; + String timestamp = "2001"; + String outputCharset = "UTF-8"; + String charSet = "UTF-8"; + + ByteArrayInputStream bais = new ByteArrayInputStream(input.getBytes(charSet)); + + FastArchivalUrlReplayParseEventHandler delegator = new FastArchivalUrlReplayParseEventHandler(); + delegator.setCommentJsp(null); + delegator.setJspInsertPath(null); + + ArchivalUrlResultURIConverter uriConverter = new ArchivalUrlResultURIConverter(); + uriConverter.setReplayURIPrefix("http://replay.archive.org/"); + + ArchivalUrlContextResultURIConverterFactory fact = + new ArchivalUrlContextResultURIConverterFactory( + (ArchivalUrlResultURIConverter) uriConverter); + + // The URL of the page, for resolving in-page relative URLs: + URL url = null; + try { + url = new URL(baseUrl); + } catch (MalformedURLException e1) { + // TODO: this shouldn't happen... + e1.printStackTrace(); + throw new IOException(e1.getMessage()); + } + + // To make sure we get the length, we have to buffer it all up... + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + // set up the context: + ReplayParseContext context = + new ReplayParseContext(fact,url,timestamp); + context.setOutputCharset(outputCharset); + context.setOutputStream(baos); + context.setJspExec(null); + + // and finally, parse, using the special lexer that knows how to + // handle javascript blocks containing unescaped HTML entities: + Page lexPage = new Page(bais,charSet); + Lexer lexer = new Lexer(lexPage); + Lexer.STRICT_REMARKS = false; + ContextAwareLexer lex = new ContextAwareLexer(lexer, context); + Node node; + try { + while((node = lex.nextNode()) != null) { + delegator.handleNode(context, node); + } + delegator.handleParseComplete(context); + } catch (ParserException e) { + e.printStackTrace(); + throw new IOException(e.getMessage()); + } + + // At this point, baos contains the utf-8 encoded bytes of our result: + return new String(baos.toByteArray(),outputCharset); + + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |