From: <bra...@us...> - 2009-11-05 23:06:45
|
Revision: 2880 http://archive-access.svn.sourceforge.net/archive-access/?rev=2880&view=rev Author: bradtofel Date: 2009-11-05 23:06:38 +0000 (Thu, 05 Nov 2009) Log Message: ----------- INITIAL REV: htmllex code for streaming modification of HTML documents Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ContextResultURIConverterFactory.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseContext.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegatorVisitor.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/StringTransformer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/AfterBodyStartTagJSPExecRule.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/AttributeModifyingRule.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/BeforeBodyEndTagJSPExecRule.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/CommentRule.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/JSContentRule.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/JSPExecRule.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/RawNodeRule.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/StaticStringRule.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/StyleContentRule.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BaseCSSStringTransformer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BaseHrefStringTransformer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BlockCSSStringTransformer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/IdentityStringTransformer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/InlineCSSStringTransformer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/JSStringTransformer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/URLStringTransformer.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ContextResultURIConverterFactory.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ContextResultURIConverterFactory.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ContextResultURIConverterFactory.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,40 @@ +/* ContextResultURIConverterFactory + * + * $Id$: + * + * Created on Nov 5, 2009. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.replay.html; + +import org.archive.wayback.ResultURIConverter; + +/** + * + * Abstracts creation of specialized ResultURIConverters based on particular + * flags. + * + * @author brad + * + */ +public interface ContextResultURIConverterFactory { + public ResultURIConverter getContextConverter(String flags); +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ContextResultURIConverterFactory.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseContext.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseContext.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseContext.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,146 @@ +/* ReplayParseContext + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html; + +import java.io.OutputStream; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.replay.JSPExecutor; +import org.archive.wayback.util.htmllex.ParseContext; + +public class ReplayParseContext extends ParseContext { + private ContextResultURIConverterFactory uriConverterFactory = null; + private String datespec = null; + private JSPExecutor jspExec = null; + private OutputStream outputStream = null; + private Map<String,ResultURIConverter> converters = null; + private String outputCharset; + private int phase = -1; + + public ReplayParseContext(ContextResultURIConverterFactory uriConverterFactory, + URL baseUrl, String datespec) { + + this.uriConverterFactory = uriConverterFactory; + this.baseUrl = baseUrl; + this.datespec = datespec; + converters = new HashMap<String,ResultURIConverter>(); + } + + public void setPhase(int phase) { + this.phase = phase; + } + public int getPhase() { + return phase; + } + + /** + * @return the converters + */ + public Map<String, ResultURIConverter> getConverters() { + return converters; + } + + /** + * @param converters the converters to set + */ + public void setConverters(Map<String, ResultURIConverter> converters) { + this.converters = converters; + } + public void addConverter(String flag, ResultURIConverter converter) { + converters.put(flag, converter); + } + + + private ResultURIConverter makeConverter(String flags) { + return uriConverterFactory.getContextConverter(flags); + } + public ResultURIConverter getConverter(String flags) { + ResultURIConverter converter = converters.get(flags); + if(converter == null) { + converter = makeConverter(flags); + converters.put(flags,converter); + } + return converter; + } + + public String contextualizeUrl(String url) { + return contextualizeUrl(url,""); + } + public String contextualizeUrl(String url, String flags) { + if(url.startsWith("javascript:")) { + return url; + } + url = super.contextualizeUrl(url); + if(flags == null) { + flags = ""; + } + ResultURIConverter converter = getConverter(flags); + return converter.makeReplayURI(datespec, url); + } + + + /** + * @return the charset + */ + public String getOutputCharset() { + return outputCharset; + } + + /** + * @param outputCharset the outputCharset to set + */ + public void setOutputCharset(String outputCharset) { + this.outputCharset = outputCharset; + } + + /** + * @return the outputStream + */ + public OutputStream getOutputStream() { + return outputStream; + } + + /** + * @param outputStream the outputStream to set + */ + public void setOutputStream(OutputStream outputStream) { + this.outputStream = outputStream; + } + /** + * @return the jspExec + */ + public JSPExecutor getJspExec() { + return jspExec; + } + /** + * @param jspExec the jspExec to set + */ + public void setJspExec(JSPExecutor jspExec) { + this.jspExec = jspExec; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseContext.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegator.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,151 @@ +/* ReplayParseEventDelegator + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.util.List; + +import org.archive.wayback.util.htmllex.ParseEventHandler; +import org.archive.wayback.util.htmllex.ParseEventDelegator; +import org.archive.wayback.util.htmllex.ParseContext; +import org.htmlparser.Node; + +public class ReplayParseEventDelegator implements ParseEventHandler { + + public static final int PHASE_PRE_MODIFY = 0; + public static final int PHASE_MODIFY = 1; + public static final int PHASE_POST_OUTPUT = 2; + + private ParseEventDelegator preModifyDelegator = null; + private ParseEventDelegator modifyDelegator = null; + private ParseEventDelegator postModifyDelegator = null; + private List<ReplayParseEventDelegatorVisitor> parserVisitors = null; + + protected void emit(ParseContext context, Node node) throws IOException { + ReplayParseContext rContext = (ReplayParseContext) context; + OutputStream out = rContext.getOutputStream(); + // no-op, override to actually output something: + if(out != null) { + String charset = rContext.getOutputCharset(); + String rawHTML = node.toHtml(true); + byte[] bytes = null; + try { + bytes = rawHTML.getBytes(charset); + } catch (UnsupportedEncodingException e) { + bytes = rawHTML.getBytes(); + } + out.write(bytes); + } + } + + + public void init() { + preModifyDelegator = new ParseEventDelegator(); + modifyDelegator = new ParseEventDelegator(); + postModifyDelegator = new ParseEventDelegator(); + if(parserVisitors != null) { + for(ReplayParseEventDelegatorVisitor visitor : parserVisitors) { + visitor.visit(this); + } + } + } + + + public void handleNode(ParseContext pContext, Node node) + throws IOException { + ReplayParseContext context = (ReplayParseContext) pContext; + context.setPhase(PHASE_PRE_MODIFY); + preModifyDelegator.handleNode(context,node); + context.setPhase(PHASE_MODIFY); + modifyDelegator.handleNode(context,node); + emit(context, node); + context.setPhase(PHASE_POST_OUTPUT); + postModifyDelegator.handleNode(context,node); + + + } + + public void handleParseComplete(ParseContext context) throws IOException { + preModifyDelegator.handleParseComplete(context); + modifyDelegator.handleParseComplete(context); + postModifyDelegator.handleParseComplete(context); + } + /** + * @return the preModifyDelegator + */ + public ParseEventDelegator getPreModifyDelegator() { + return preModifyDelegator; + } + + + /** + * @param preModifyDelegator the preModifyDelegator to set + */ + public void setPreModifyDelegator(ParseEventDelegator preModifyDelegator) { + this.preModifyDelegator = preModifyDelegator; + } + + + /** + * @return the modifyDelegator + */ + public ParseEventDelegator getModifyDelegator() { + return modifyDelegator; + } + + + /** + * @param modifyDelegator the modifyDelegator to set + */ + public void setModifyDelegator(ParseEventDelegator modifyDelegator) { + this.modifyDelegator = modifyDelegator; + } + + + /** + * @return the postModifyDelegator + */ + public ParseEventDelegator getPostModifyDelegator() { + return postModifyDelegator; + } + + + /** + * @param postModifyDelegator the postModifyDelegator to set + */ + public void setPostModifyDelegator(ParseEventDelegator postModifyDelegator) { + this.postModifyDelegator = postModifyDelegator; + } + + + /** + * @param parserVisitors the parserVisitors to set + */ + public void setParserVisitors(List<ReplayParseEventDelegatorVisitor> parserVisitors) { + this.parserVisitors = parserVisitors; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegator.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegatorVisitor.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegatorVisitor.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegatorVisitor.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,29 @@ +/* ReplayParseEventDelegatorVisitor + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html; + +public interface ReplayParseEventDelegatorVisitor { + public void visit(ReplayParseEventDelegator rules); +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/ReplayParseEventDelegatorVisitor.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/StringTransformer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/StringTransformer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/StringTransformer.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,29 @@ +/* StringTransformer + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html; + +public interface StringTransformer { + public String transform(ReplayParseContext context, String input); +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/StringTransformer.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/AfterBodyStartTagJSPExecRule.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/AfterBodyStartTagJSPExecRule.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/AfterBodyStartTagJSPExecRule.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,114 @@ +/* AfterBodyStartTagJSPExecRule + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html.rules; + +import java.io.IOException; + +import javax.servlet.ServletException; + +import org.archive.wayback.replay.html.ReplayParseEventDelegator; +import org.archive.wayback.replay.html.ReplayParseEventDelegatorVisitor; +import org.archive.wayback.replay.html.ReplayParseContext; +import org.archive.wayback.util.htmllex.ParseContext; +import org.archive.wayback.util.htmllex.handlers.OpenTagHandler; +import org.htmlparser.Node; +import org.htmlparser.nodes.TagNode; + +/** + * This Rule fires just after the BODY start tag, emitting the result of the + * replay .jsp into the resulting page at that point. + * + * Sounds simple, BUT, it's possible there is no BODY start tag... + * + * In case this happens, we watch *ALL* tags go by, before they've been output, + * and if we see any start tags not of the following types: + * + * html,head,base,link,meta,title,style,script + * + * we emit our content then and there. + * + * We also ensure we don't emit twice by storing a flag in the ParseContext once + * we do emit. + * + * @author brad + * + */ +public class AfterBodyStartTagJSPExecRule extends JSPExecRule +implements ReplayParseEventDelegatorVisitor, OpenTagHandler { + private final String[] okHeadTags = { + "HTML","HEAD","BASE","LINK","META","TITLE","STYLE","SCRIPT","BODY" + }; + private final static String FERRET_DONE_KEY = + AfterBodyStartTagJSPExecRule.class.toString(); + public void visit(ReplayParseEventDelegator rules) { + + rules.getPostModifyDelegator().addOpenTagHandler(this,"BODY"); + rules.getPreModifyDelegator().addOpenTagHandler(this); + } + + public void emit(ReplayParseContext context, Node node) throws IOException { + String found = context.getData(FERRET_DONE_KEY); + if(found == null) { + context.putData(FERRET_DONE_KEY,"1"); + try { + super.emit(context, node); + } catch (ServletException e) { + throw new IOException(e); + } + } + } + + private boolean isNotTagAppearingInHead(TagNode node) { + String thisTag = node.getTagName(); + if(thisTag.startsWith("!")) return false; + for(String tag : okHeadTags) { + if(thisTag.equals(tag)) { + return false; + } + } + return true; + } + + public void handleOpenTagNode(ParseContext pContext, TagNode node) + throws IOException { + ReplayParseContext context = (ReplayParseContext) pContext; + if(context.getData(FERRET_DONE_KEY) == null) { + // we haven't emitted yet: + // are we running in post-emit? + if(context.getPhase() == ReplayParseEventDelegator.PHASE_POST_OUTPUT) { + // emit if it is a body tag: + if(node.getTagName().equals("BODY")) { + emit((ReplayParseContext) context,node); + } + } else { + // must be PHASE_PRE_MODIFY: if it's a body tag, emit now: + if(isNotTagAppearingInHead(node)) { + // and this is a tag that shouldn't be in the HEAD. Emit: + emit((ReplayParseContext) context,node); + } + } + } + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/AfterBodyStartTagJSPExecRule.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/AttributeModifyingRule.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/AttributeModifyingRule.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/AttributeModifyingRule.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,154 @@ +/* AttributeModifyingRule + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html.rules; + +import java.io.IOException; + +import org.archive.wayback.replay.html.ReplayParseEventDelegator; +import org.archive.wayback.replay.html.ReplayParseEventDelegatorVisitor; +import org.archive.wayback.replay.html.ReplayParseContext; +import org.archive.wayback.replay.html.StringTransformer; +import org.archive.wayback.util.htmllex.ParseContext; +import org.archive.wayback.util.htmllex.handlers.OpenTagHandler; +import org.htmlparser.nodes.TagNode; + +public class AttributeModifyingRule implements ReplayParseEventDelegatorVisitor, + OpenTagHandler { + + private String tagName = null; + private String whereAttributeName = null; + private String whereAttributeValue = null; + private String modifyAttributeName = null; + private StringTransformer transformer; + + public void visit(ReplayParseEventDelegator rules) { + if(modifyAttributeName == null) { + throw new RuntimeException("Need modifyAttributeName"); + } + if(tagName == null) { + rules.getModifyDelegator().addOpenTagHandler(this); + } else { + rules.getModifyDelegator().addOpenTagHandler(this, tagName); + } + } + + public void handleOpenTagNode(ParseContext context, TagNode node) + throws IOException { + if(whereAttributeName != null) { + // if matchAttrName is set, make sure it is present: + String nodeAttrVal = node.getAttribute(whereAttributeName); + if(nodeAttrVal == null) { + return; + } + // if the value is specified, too, make sure that matches, as well: + if(whereAttributeValue != null) { + if(!nodeAttrVal.equals(whereAttributeValue)) { + return; + } + } + } + // try to perform the update: + if(modifyAttributeName == null) { + // mis-configuration... this is required: + // TODO: log a warning + return; + } + String nodeVal = node.getAttribute(modifyAttributeName); + if(nodeVal != null) { + String newVal = transformer.transform((ReplayParseContext)context, nodeVal); + node.setAttribute(modifyAttributeName, newVal); + } + } + + /** + * @return the tagName + */ + public String getTagName() { + return tagName; + } + + /** + * @param tagName the tagName to set + */ + public void setTagName(String tagName) { + this.tagName = tagName.toUpperCase(); + } + + /** + * @return the whereAttributeName + */ + public String getWhereAttributeName() { + return whereAttributeName; + } + + /** + * @param whereAttributeName the whereAttributeName to set + */ + public void setWhereAttributeName(String whereAttributeName) { + this.whereAttributeName = whereAttributeName.toUpperCase(); + } + + /** + * @return the whereAttributeValue + */ + public String getWhereAttributeValue() { + return whereAttributeValue; + } + + /** + * @param whereAttributeValue the whereAttributeValue to set + */ + public void setWhereAttributeValue(String whereAttributeValue) { + this.whereAttributeValue = whereAttributeValue; + } + + /** + * @return the modifyAttributeName + */ + public String getModifyAttributeName() { + return modifyAttributeName; + } + + /** + * @param modifyAttribute the modifyAttribute to set + */ + public void setModifyAttributeName(String modifyAttributeName) { + this.modifyAttributeName = modifyAttributeName.toUpperCase(); + } + + /** + * @return the transformer + */ + public StringTransformer getTransformer() { + return transformer; + } + + /** + * @param transformer the transformer to set + */ + public void setTransformer(StringTransformer transformer) { + this.transformer = transformer; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/AttributeModifyingRule.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/BeforeBodyEndTagJSPExecRule.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/BeforeBodyEndTagJSPExecRule.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/BeforeBodyEndTagJSPExecRule.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,75 @@ +/* BeforeBodyEndTagJSPExecRule + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html.rules; + +import java.io.IOException; + +import javax.servlet.ServletException; + +import org.archive.wayback.replay.html.ReplayParseEventDelegator; +import org.archive.wayback.replay.html.ReplayParseEventDelegatorVisitor; +import org.archive.wayback.replay.html.ReplayParseContext; +import org.archive.wayback.util.htmllex.ParseContext; +import org.archive.wayback.util.htmllex.handlers.CloseTagHandler; +import org.archive.wayback.util.htmllex.handlers.ParseCompleteHandler; +import org.htmlparser.Node; +import org.htmlparser.nodes.TagNode; + +public class BeforeBodyEndTagJSPExecRule extends JSPExecRule +implements ReplayParseEventDelegatorVisitor, CloseTagHandler, ParseCompleteHandler { + private final static String FERRET_DONE_KEY = + BeforeBodyEndTagJSPExecRule.class.toString(); + + public void visit(ReplayParseEventDelegator rules) { + rules.getPreModifyDelegator().addCloseTagHandler(this); + rules.getPreModifyDelegator().addParseCompleteHandler(this); + } + + public void emit(ReplayParseContext context, Node node) throws IOException { + String found = context.getData(FERRET_DONE_KEY); + if(found == null) { + context.putData(FERRET_DONE_KEY,"1"); + try { + super.emit(context, node); + } catch (ServletException e) { + throw new IOException(e); + } + } + } + + + public void handleCloseTagNode(ParseContext context, TagNode node) + throws IOException { + String tagName = node.getTagName(); + if(tagName.equals("BODY") || tagName.equals("HTML")) { + emit((ReplayParseContext) context,node); + } + } + + public void handleParseComplete(ParseContext context) throws IOException { + emit((ReplayParseContext) context,null); + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/BeforeBodyEndTagJSPExecRule.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/CommentRule.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/CommentRule.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/CommentRule.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,67 @@ +/* CommentRule + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html.rules; + +import java.io.IOException; +import java.io.OutputStream; + +import org.archive.wayback.replay.html.ReplayParseEventDelegator; +import org.archive.wayback.replay.html.ReplayParseEventDelegatorVisitor; +import org.archive.wayback.replay.html.ReplayParseContext; +import org.archive.wayback.util.htmllex.ParseContext; +import org.archive.wayback.util.htmllex.handlers.CloseTagHandler; +import org.archive.wayback.util.htmllex.handlers.OpenTagHandler; +import org.htmlparser.Node; +import org.htmlparser.nodes.TagNode; + +public class CommentRule implements ReplayParseEventDelegatorVisitor, + OpenTagHandler, CloseTagHandler { + + private final static byte[] startComment = "<!--".getBytes(); + private final static byte[] endComment = "-->".getBytes(); + + public void emit(ReplayParseContext context, Node node) throws IOException { + OutputStream os = context.getOutputStream(); + if(os != null) { + os.write(startComment); + os.write(node.toHtml(true).getBytes()); + os.write(endComment); + } + } + + public void visit(ReplayParseEventDelegator rules) { + rules.getPreModifyDelegator().addOpenTagHandler(this); + rules.getPreModifyDelegator().addCloseTagHandler(this, "A"); + } + + public void handleOpenTagNode(ParseContext context, TagNode node) throws IOException { + emit((ReplayParseContext)context,node); + } + + public void handleCloseTagNode(ParseContext context, TagNode node) + throws IOException { + emit((ReplayParseContext)context,node); + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/CommentRule.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/JSContentRule.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/JSContentRule.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/JSContentRule.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,62 @@ +/* JSContentRule + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html.rules; + +import java.io.IOException; + +import org.archive.wayback.replay.html.ReplayParseEventDelegator; +import org.archive.wayback.replay.html.ReplayParseEventDelegatorVisitor; +import org.archive.wayback.replay.html.ReplayParseContext; +import org.archive.wayback.replay.html.StringTransformer; +import org.archive.wayback.util.htmllex.ParseContext; +import org.archive.wayback.util.htmllex.handlers.JSTextHandler; +import org.htmlparser.nodes.TextNode; + +public class JSContentRule implements ReplayParseEventDelegatorVisitor, JSTextHandler { + private StringTransformer transformer; + + public void visit(ReplayParseEventDelegator rules) { + rules.getModifyDelegator().addJSTextHandler(this); + } + + public void handleJSTextNode(ParseContext context, TextNode node) + throws IOException { + node.setText(transformer.transform((ReplayParseContext)context, node.getText())); + } + + /** + * @return the transformer + */ + public StringTransformer getTransformer() { + return transformer; + } + + /** + * @param transformer the transformer to set + */ + public void setTransformer(StringTransformer transformer) { + this.transformer = transformer; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/JSContentRule.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/JSPExecRule.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/JSPExecRule.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/JSPExecRule.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,72 @@ +/* JSPExecRule + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html.rules; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; + +import javax.servlet.ServletException; + +import org.archive.wayback.replay.JSPExecutor; +import org.archive.wayback.replay.html.ReplayParseContext; +import org.htmlparser.Node; + +public class JSPExecRule { + private String jspPath = null; + + public void emit(ReplayParseContext context, Node node) throws ServletException, IOException { + JSPExecutor jspExec = context.getJspExec(); + if(jspExec != null) { + OutputStream os = context.getOutputStream(); + if(os != null) { + String jspResult = jspExec.jspToString(jspPath); + byte[] bytes = null; + try { + bytes = jspResult.getBytes(context.getOutputCharset()); + } catch(UnsupportedEncodingException e) { + e.printStackTrace(); + bytes = jspResult.getBytes(); + } + os.write(bytes); + } + } + } + + /** + * @return the jspPath + */ + public String getJspPath() { + return jspPath; + } + + /** + * @param jspPath the jspPath to set + */ + public void setJspPath(String jspPath) { + this.jspPath = jspPath; + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/JSPExecRule.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/RawNodeRule.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/RawNodeRule.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/RawNodeRule.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,53 @@ +/* RawNodeRule + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html.rules; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; + +import javax.servlet.ServletException; + +import org.archive.wayback.replay.html.ReplayParseContext; +import org.htmlparser.Node; + +public class RawNodeRule { + + public void emit(ReplayParseContext context, Node node) throws ServletException, + IOException { + OutputStream os = context.getOutputStream(); + if(os != null) { + String charset = context.getOutputCharset(); + String rawHTML = node.toHtml(true); + try { + os.write(rawHTML.getBytes(charset)); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + os.write(rawHTML.getBytes()); + } + } + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/RawNodeRule.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/StaticStringRule.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/StaticStringRule.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/StaticStringRule.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,56 @@ +/* StaticStringRule + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html.rules; + +import java.io.IOException; +import java.io.OutputStream; + +import javax.servlet.ServletException; + +import org.archive.wayback.replay.html.ReplayParseContext; +import org.htmlparser.Node; + +public class StaticStringRule { + public String text; + public void emit(ReplayParseContext context, Node node) throws ServletException, + IOException { + OutputStream os = context.getOutputStream(); + if(os != null) { + os.write(text.getBytes(context.getOutputCharset())); + } + } + /** + * @return the text + */ + public String getText() { + return text; + } + /** + * @param text the text to set + */ + public void setText(String text) { + this.text = text; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/StaticStringRule.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/StyleContentRule.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/StyleContentRule.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/StyleContentRule.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,60 @@ +/* StyleContentRule + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html.rules; + +import java.io.IOException; + +import org.archive.wayback.replay.html.ReplayParseEventDelegator; +import org.archive.wayback.replay.html.ReplayParseEventDelegatorVisitor; +import org.archive.wayback.replay.html.ReplayParseContext; +import org.archive.wayback.replay.html.StringTransformer; +import org.archive.wayback.util.htmllex.ParseContext; +import org.archive.wayback.util.htmllex.handlers.CSSTextHandler; +import org.htmlparser.nodes.TextNode; + +public class StyleContentRule implements ReplayParseEventDelegatorVisitor, CSSTextHandler { + private StringTransformer transformer; + + public void visit(ReplayParseEventDelegator rules) { + rules.getModifyDelegator().addCSSTextHandler(this); + } + public void handleCSSTextNode(ParseContext context, TextNode node) + throws IOException { + node.setText(transformer.transform((ReplayParseContext)context, node.getText())); + } + /** + * @return the transformer + */ + public StringTransformer getTransformer() { + return transformer; + } + + /** + * @param transformer the transformer to set + */ + public void setTransformer(StringTransformer transformer) { + this.transformer = transformer; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/rules/StyleContentRule.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BaseCSSStringTransformer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BaseCSSStringTransformer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BaseCSSStringTransformer.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,84 @@ +/* URLStringTransformer + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html.transformer; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.archive.wayback.replay.html.ReplayParseContext; + +public abstract class BaseCSSStringTransformer { + // this looks for "url(ZZZ)" + protected static String cssUrlPatString = + "url\\s*\\(\\s*([\\\\\"']*.+?[\\\\\"']*)\\s*\\)"; +// protected static String cssUrlPatString = +// "url\\s*\\(\\s*([^\\)]*)\\s*\\)"; + + // this looks for various forms of "@import ZZZ" where "ZZZ" may or may not + // have quotes and parenths around it.. + // this regex is not supposed to match the (correct) @import url(ZZZ) form, + // which is handled by the more generic "url(ZZZ)" pattern + protected static String cssImportNoUrlPatString = + "@import\\s+(('[^']+')|(\"[^\"]+\")|(\\('[^']+'\\))|(\\(\"[^\"]+\"\\))|(\\([^)]+\\))|([a-z0-9_.:/\\\\-]+))\\s*;"; + + protected static Pattern cssImportNoUrlPattern = Pattern + .compile(cssImportNoUrlPatString); + + protected static Pattern cssUrlPattern = Pattern.compile(cssUrlPatString); + + protected void patternRewrite(ReplayParseContext context, StringBuilder sb, + Pattern pattern, String flags) { + int idx = 0; + Matcher urlMatcher = pattern.matcher(sb); + while (urlMatcher.find(idx)) { + String url = urlMatcher.group(1); + int origUrlLength = url.length(); + int urlStart = urlMatcher.start(1); + int urlEnd = urlMatcher.end(1); + idx = urlEnd; + if ((url.charAt(0) == '(') + && (url.charAt(origUrlLength-1) == ')')) { + url = url.substring(1, origUrlLength - 1); + urlStart += 1; + origUrlLength -= 2; + } + if (url.charAt(0) == '"') { + url = url.substring(1, origUrlLength - 1); + urlStart += 1; + } else if (url.charAt(0) == '\'') { + url = url.substring(1, origUrlLength - 1); + urlStart += 1; + } else if (url.charAt(0) == '\\') { + url = url.substring(2, origUrlLength - 2); + urlStart += 2; + } + int urlLength = url.length(); + String replayUrl = context.contextualizeUrl(url, flags); + int delta = replayUrl.length() - urlLength; + sb.replace(urlStart, urlStart + urlLength, replayUrl); + idx += delta; + } + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BaseCSSStringTransformer.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BaseHrefStringTransformer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BaseHrefStringTransformer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BaseHrefStringTransformer.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,43 @@ +/* URLStringTransformer + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html.transformer; + +import java.net.MalformedURLException; +import java.net.URL; + +import org.archive.wayback.replay.html.ReplayParseContext; +import org.archive.wayback.replay.html.StringTransformer; + +public class BaseHrefStringTransformer implements StringTransformer { + + public String transform(ReplayParseContext context, String input) { + try { + context.setBaseUrl(new URL(input)); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + return input; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BaseHrefStringTransformer.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BlockCSSStringTransformer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BlockCSSStringTransformer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BlockCSSStringTransformer.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,41 @@ +/* URLStringTransformer + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html.transformer; + +import org.archive.wayback.replay.html.ReplayParseContext; +import org.archive.wayback.replay.html.StringTransformer; + +public class BlockCSSStringTransformer extends BaseCSSStringTransformer implements StringTransformer { + + public String transform(ReplayParseContext context, String css) { + StringBuilder sb = new StringBuilder(css); + patternRewrite((ReplayParseContext)context, sb,cssUrlPattern, null); + patternRewrite((ReplayParseContext)context, sb,cssImportNoUrlPattern, + "cs_"); +// return "__BCSS__" + sb.toString() + "__BCSS__"; + return sb.toString(); + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/BlockCSSStringTransformer.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/IdentityStringTransformer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/IdentityStringTransformer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/IdentityStringTransformer.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,35 @@ +/* URLStringTransformer + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay.html.transformer; + +import org.archive.wayback.replay.html.ReplayParseContext; +import org.archive.wayback.replay.html.StringTransformer; + +public class IdentityStringTransformer implements StringTransformer { + + public String transform(ReplayParseContext context, String input) { + return input; + } +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/IdentityStringTransformer.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/InlineCSSStringTransformer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/InlineCSSStringTransformer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/InlineCSSStringTransformer.java 2009-11-05 23:06:38 UTC (rev 2880) @@ -0,0 +1,38 @@ +/* URLStringTransformer + * + * $Id$ + * + * Created on 12:36:59 PM Nov 5, 2009. + * + * Copyright (C) 2008 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of t... [truncated message content] |