Revision: 2989 http://archive-access.svn.sourceforge.net/archive-access/?rev=2989&view=rev Author: bradtofel Date: 2010-03-20 01:11:18 +0000 (Sat, 20 Mar 2010) Log Message: ----------- FEATURE: transformer for META refresh tags Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/MetaRefreshUrlStringTransformer.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/MetaRefreshUrlStringTransformer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/MetaRefreshUrlStringTransformer.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/MetaRefreshUrlStringTransformer.java 2010-03-20 01:11:18 UTC (rev 2989) @@ -0,0 +1,78 @@ +/* MetaRefreshUrlStringTransformer + * + * $Id$: + * + * Created on Jan 12, 2010. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of Wayback. + * + * Wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * Wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with Wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package org.archive.wayback.replay.html.transformer; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.archive.wayback.replay.html.ReplayParseContext; +import org.archive.wayback.replay.html.StringTransformer; + +/** + * @author brad + * + */ +public class MetaRefreshUrlStringTransformer extends URLStringTransformer +implements StringTransformer { + + private final static Pattern refreshURLPattern = + Pattern.compile("^\\d+\\s*;\\s*url\\s*=\\s*(.+?)\\s*$", + Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); + + /* (non-Javadoc) + * @see org.archive.wayback.replay.html.StringTransformer#transform(org.archive.wayback.replay.html.ReplayParseContext, java.lang.String) + */ + public String transform(ReplayParseContext context, String input) { + /* + <META + HTTP-EQUIV="Refresh" + CONTENT="0; URL=/ics/default.asp"> + + Our argument "input" is set to the value of the "CONTENT" attribute. + + So, we need to search for the "URL=", take everything to the right + of that, trim it, contextualize it, and return that. + */ + Matcher m = refreshURLPattern.matcher(input); + if(m.matches()) { + if(m.groupCount() == 1) { + StringBuilder sb = new StringBuilder(input.length() * 2); + + sb.append(input.substring(0,m.start(1))); + + sb.append(super.transform(context, m.group(1))); + + // This was temporarily used for testing the regex: +// sb.append("(((").append(m.group(1)).append(")))"); + + sb.append(input.substring(m.end(1))); + return sb.toString(); + } + } + return input; + } + +} Property changes on: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/html/transformer/MetaRefreshUrlStringTransformer.java ___________________________________________________________________ Added: svn:keywords + Author Date Revision Id This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |