From: <bra...@us...> - 2007-07-26 21:53:52
|
Revision: 1895 http://archive-access.svn.sourceforge.net/archive-access/?rev=1895&view=rev Author: bradtofel Date: 2007-07-26 14:53:47 -0700 (Thu, 26 Jul 2007) Log Message: ----------- REFACTOR: moved TagMagix from org.archive.wayback.archivalurl to org.archive.wayback.replay, as it is used by several Replay Renderers. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/timeline/TimelineReplayRenderer.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagix.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java 2007-07-26 21:47:22 UTC (rev 1894) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -39,6 +39,7 @@ import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.replay.BaseReplayRenderer; +import org.archive.wayback.replay.TagMagix; import org.archive.wayback.util.StringFormatter; import org.archive.wayback.util.UrlCanonicalizer; Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagix.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagix.java 2007-07-26 21:47:22 UTC (rev 1894) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagix.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -1,286 +0,0 @@ -/* TagMagix - * - * $Id$ - * - * Created on 5:17:27 PM Feb 14, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.archivalurl; - -import java.util.HashMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.util.UrlCanonicalizer; - -/** - * Library for updating arbitrary attributes in arbitrary tags to rewrite HTML - * documents so URI references point back into the Wayback Machine. Attempts to - * make minimal changes so nothing gets broken during this process. - * - * @author brad - * @version $Date$, $Revision: - * 1668 $ - */ -public class TagMagix { - - private static HashMap<String, Pattern> pcPatterns = - new HashMap<String, Pattern>(); - - private static HashMap<String, Pattern> wholeTagPatterns = - new HashMap<String, Pattern>(); - - private static HashMap<String, Pattern> attrPatterns = - new HashMap<String, Pattern>(); - - private static String QUOTED_ATTR_VALUE = "(?:\"[^\">]*\")"; - - private static String ESC_QUOTED_ATTR_VALUE = "(?:\\\\\"[^>\\\\]*\\\\\")"; - - private static String APOSED_ATTR_VALUE = "(?:'[^'>]*')"; - - private static String RAW_ATTR_VALUE = "(?:[^ \\t\\n\\x0B\\f\\r>\"']+)"; - - private static String ANY_ATTR_VALUE = QUOTED_ATTR_VALUE + "|" - + APOSED_ATTR_VALUE + "|" + ESC_QUOTED_ATTR_VALUE + "|" - + RAW_ATTR_VALUE; - - /** - * get (and cache) a regex Pattern for locating an HTML attribute value - * within a particular tag. if found, the pattern will have the attribute - * value in group 1. Note that the attribute value may contain surrounding - * apostrophe(') or quote(") characters. - * - * @param tagName - * @param attrName - * @return Pattern to match the tag-attribute's value - */ - private synchronized static Pattern getPattern(String tagName, - String attrName) { - - String key = tagName + " " + attrName; - Pattern pc = pcPatterns.get(key); - if (pc == null) { - - String tagPatString = "<\\s*" + tagName + "\\s+[^>]*\\b" + attrName - + "\\s*=\\s*(" + ANY_ATTR_VALUE + ")(?:\\s|>)?"; - - pc = Pattern.compile(tagPatString, Pattern.CASE_INSENSITIVE); - pcPatterns.put(key, pc); - } - return pc; - } - - /** - * get (and cache) a regex Pattern for locating an entire HTML start tag. - * - * @param tagName - * @return Pattern to match the tag - */ - private synchronized static Pattern getWholeTagPattern(String tagName) { - - Pattern pc = wholeTagPatterns.get(tagName); - if (pc == null) { - - String tagPatString = "<\\s*" + tagName + "\\s+[^>]+>"; - - pc = Pattern.compile(tagPatString, Pattern.CASE_INSENSITIVE); - wholeTagPatterns.put(tagName, pc); - } - return pc; - } - - /** - * get (and cache) a regex Pattern for locating an attribute value within an - * HTML start tag. If this pattern matches, the attribute value will be in - * group(1), and will include surrounding quotes, or apos, if they were - * present in the original HTML. - * - * @param attrName - * @return Pattern to match the attributes value - */ - private synchronized static Pattern getAttrPattern(String attrName) { - - Pattern pc = attrPatterns.get(attrName); - if (pc == null) { - - String attrPatString = "\\b" + attrName + "\\s*=\\s*(" - + ANY_ATTR_VALUE + ")(?:\\s|>)?"; - - pc = Pattern.compile(attrPatString, Pattern.CASE_INSENSITIVE); - attrPatterns.put(attrName, pc); - } - return pc; - } - - /** - * Alter the HTML document in page, updating URLs in the attrName attributes - * of all tagName tags such that: - * - * 1) absolute URLs are prefixed with: wmPrefix + pageTS 2) server-relative - * URLs are prefixed with: wmPrefix + pageTS + (host of page) 3) - * path-relative URLs are prefixed with: wmPrefix + pageTS + (attribute URL - * resolved against pageUrl) - * - * @param page - * @param uriConverter - * @param captureDate - * @param baseUrl which must be absolute - * @param tagName - * @param attrName - */ - public static void markupTagREURIC(StringBuilder page, - ResultURIConverter uriConverter, String captureDate, - String baseUrl, String tagName, String attrName) { - - Pattern tagPat = getPattern(tagName, attrName); - Matcher matcher = tagPat.matcher(page); - - int idx = 0; - while (matcher.find(idx)) { - String url = matcher.group(1); - int origUrlLength = url.length(); - int attrStart = matcher.start(1); - int attrEnd = matcher.end(1); - String quote = ""; - if (url.charAt(0) == '"') { - quote = "\""; - url = url.substring(1, url.length() - 1); - } else if (url.charAt(0) == '\'') { - quote = "'"; - url = url.substring(1, url.length() - 1); - } else if (url.charAt(0) == '\\') { - quote = "\\\""; - url = url.substring(2, url.length() - 2); - } - String finalUrl = UrlCanonicalizer.resolveUrl(baseUrl,url); - String replayUrl = quote - + uriConverter.makeReplayURI(captureDate, finalUrl) + quote; - - int delta = replayUrl.length() - origUrlLength; - page.replace(attrStart, attrEnd, replayUrl); - idx = attrEnd + delta; - } - } - - private static String trimAttrValue(String value) { - if (value.charAt(0) == '"') { - value = value.substring(1, value.length() - 1); - } else if (value.charAt(0) == '\'') { - value = value.substring(1, value.length() - 1); - } - return value; - } - - /** - * find and return the ATTR value within a TAG tag inside the HTML document - * within the StringBuffer page. returns null if no TAG-ATTR is found. - * - * @param page - * @param tag - * @param attr - * @return URL of base-href within page, or null if none is found. - */ - public static String getTagAttr(StringBuilder page, final String tag, - final String attr) { - - String found = null; - Pattern daPattern = TagMagix.getPattern(tag, attr); - Matcher matcher = daPattern.matcher(page); - int idx = 0; - - if (matcher.find(idx)) { - found = matcher.group(1); - found = trimAttrValue(found); - } - - return found; - } - - /** - * Search through the HTML contained in page, returning the value of a - * particular attribute. This version allows matching only tags that contain - * a particular attribute-value pair, which is useful in extracting META tag - * values, for example, in returning the value of the "content" attribute in - * a META tag that also contains an attribute "http-equiv" with a value of - * "Content-Type". All comparision is case-insensitive, but the value - * returned is the original attribute value, as unmolested as possible. - * - * If nothing matches, returns null. - * - * - * @param page - * StringBuilding holding HTML - * @param tag - * String containing tagname of interest - * @param findAttr - * name of attribute within the tag to return - * @param whereAttr - * only match tags with an attribute whereAttr - * @param whereVal - * only match tags with whereAttr having this value - * @return the value of attribute attr in tag where the tag also contains an - * attribute whereAttr, with value whereVal, or null if nothing - * matches. - */ - public static String getTagAttrWhere(StringBuilder page, final String tag, - final String findAttr, final String whereAttr, final String whereVal) { - - Pattern tagPattern = TagMagix.getWholeTagPattern(tag); - Pattern findAttrPattern = getAttrPattern(findAttr); - Pattern whereAttrPattern = getAttrPattern(whereAttr); - Matcher tagMatcher = tagPattern.matcher(page); - - while (tagMatcher.find()) { - String wholeTag = tagMatcher.group(); - Matcher whereAttrMatcher = whereAttrPattern.matcher(wholeTag); - if (whereAttrMatcher.find()) { - String attrValue = whereAttrMatcher.group(1); - attrValue = trimAttrValue(attrValue); - if (attrValue.compareToIgnoreCase(whereVal) == 0) { - // this tag contains the right set, return the value for - // the attribute findAttr: - Matcher findAttrMatcher = findAttrPattern.matcher(wholeTag); - String value = null; - if (findAttrMatcher.find()) { - value = findAttrMatcher.group(1); - value = trimAttrValue(value); - } - return value; - } - // not the tag we want... maybe there is another: loop - } - } - - return null; - } - - /** - * find and return the href value within a BASE tag inside the HTML document - * within the StringBuffer page. returns null if no BASE-HREF is found. - * - * @param page - * @return URL of base-href within page, or null if none is found. - */ - public static String getBaseHref(StringBuilder page) { - return getTagAttr(page, "BASE", "HREF"); - } -} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayRenderer.java 2007-07-26 21:47:22 UTC (rev 1894) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayRenderer.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -42,7 +42,6 @@ import org.archive.wayback.ReplayRenderer; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.WaybackConstants; -import org.archive.wayback.archivalurl.TagMagix; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.UIResults; Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java (from rev 1766, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagix.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -0,0 +1,286 @@ +/* TagMagix + * + * $Id$ + * + * Created on 5:17:27 PM Feb 14, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.util.HashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.util.UrlCanonicalizer; + +/** + * Library for updating arbitrary attributes in arbitrary tags to rewrite HTML + * documents so URI references point back into the Wayback Machine. Attempts to + * make minimal changes so nothing gets broken during this process. + * + * @author brad + * @version $Date$, $Revision: + * 1668 $ + */ +public class TagMagix { + + private static HashMap<String, Pattern> pcPatterns = + new HashMap<String, Pattern>(); + + private static HashMap<String, Pattern> wholeTagPatterns = + new HashMap<String, Pattern>(); + + private static HashMap<String, Pattern> attrPatterns = + new HashMap<String, Pattern>(); + + private static String QUOTED_ATTR_VALUE = "(?:\"[^\">]*\")"; + + private static String ESC_QUOTED_ATTR_VALUE = "(?:\\\\\"[^>\\\\]*\\\\\")"; + + private static String APOSED_ATTR_VALUE = "(?:'[^'>]*')"; + + private static String RAW_ATTR_VALUE = "(?:[^ \\t\\n\\x0B\\f\\r>\"']+)"; + + private static String ANY_ATTR_VALUE = QUOTED_ATTR_VALUE + "|" + + APOSED_ATTR_VALUE + "|" + ESC_QUOTED_ATTR_VALUE + "|" + + RAW_ATTR_VALUE; + + /** + * get (and cache) a regex Pattern for locating an HTML attribute value + * within a particular tag. if found, the pattern will have the attribute + * value in group 1. Note that the attribute value may contain surrounding + * apostrophe(') or quote(") characters. + * + * @param tagName + * @param attrName + * @return Pattern to match the tag-attribute's value + */ + private synchronized static Pattern getPattern(String tagName, + String attrName) { + + String key = tagName + " " + attrName; + Pattern pc = pcPatterns.get(key); + if (pc == null) { + + String tagPatString = "<\\s*" + tagName + "\\s+[^>]*\\b" + attrName + + "\\s*=\\s*(" + ANY_ATTR_VALUE + ")(?:\\s|>)?"; + + pc = Pattern.compile(tagPatString, Pattern.CASE_INSENSITIVE); + pcPatterns.put(key, pc); + } + return pc; + } + + /** + * get (and cache) a regex Pattern for locating an entire HTML start tag. + * + * @param tagName + * @return Pattern to match the tag + */ + private synchronized static Pattern getWholeTagPattern(String tagName) { + + Pattern pc = wholeTagPatterns.get(tagName); + if (pc == null) { + + String tagPatString = "<\\s*" + tagName + "\\s+[^>]+>"; + + pc = Pattern.compile(tagPatString, Pattern.CASE_INSENSITIVE); + wholeTagPatterns.put(tagName, pc); + } + return pc; + } + + /** + * get (and cache) a regex Pattern for locating an attribute value within an + * HTML start tag. If this pattern matches, the attribute value will be in + * group(1), and will include surrounding quotes, or apos, if they were + * present in the original HTML. + * + * @param attrName + * @return Pattern to match the attributes value + */ + private synchronized static Pattern getAttrPattern(String attrName) { + + Pattern pc = attrPatterns.get(attrName); + if (pc == null) { + + String attrPatString = "\\b" + attrName + "\\s*=\\s*(" + + ANY_ATTR_VALUE + ")(?:\\s|>)?"; + + pc = Pattern.compile(attrPatString, Pattern.CASE_INSENSITIVE); + attrPatterns.put(attrName, pc); + } + return pc; + } + + /** + * Alter the HTML document in page, updating URLs in the attrName attributes + * of all tagName tags such that: + * + * 1) absolute URLs are prefixed with: wmPrefix + pageTS 2) server-relative + * URLs are prefixed with: wmPrefix + pageTS + (host of page) 3) + * path-relative URLs are prefixed with: wmPrefix + pageTS + (attribute URL + * resolved against pageUrl) + * + * @param page + * @param uriConverter + * @param captureDate + * @param baseUrl which must be absolute + * @param tagName + * @param attrName + */ + public static void markupTagREURIC(StringBuilder page, + ResultURIConverter uriConverter, String captureDate, + String baseUrl, String tagName, String attrName) { + + Pattern tagPat = getPattern(tagName, attrName); + Matcher matcher = tagPat.matcher(page); + + int idx = 0; + while (matcher.find(idx)) { + String url = matcher.group(1); + int origUrlLength = url.length(); + int attrStart = matcher.start(1); + int attrEnd = matcher.end(1); + String quote = ""; + if (url.charAt(0) == '"') { + quote = "\""; + url = url.substring(1, url.length() - 1); + } else if (url.charAt(0) == '\'') { + quote = "'"; + url = url.substring(1, url.length() - 1); + } else if (url.charAt(0) == '\\') { + quote = "\\\""; + url = url.substring(2, url.length() - 2); + } + String finalUrl = UrlCanonicalizer.resolveUrl(baseUrl,url); + String replayUrl = quote + + uriConverter.makeReplayURI(captureDate, finalUrl) + quote; + + int delta = replayUrl.length() - origUrlLength; + page.replace(attrStart, attrEnd, replayUrl); + idx = attrEnd + delta; + } + } + + private static String trimAttrValue(String value) { + if (value.charAt(0) == '"') { + value = value.substring(1, value.length() - 1); + } else if (value.charAt(0) == '\'') { + value = value.substring(1, value.length() - 1); + } + return value; + } + + /** + * find and return the ATTR value within a TAG tag inside the HTML document + * within the StringBuffer page. returns null if no TAG-ATTR is found. + * + * @param page + * @param tag + * @param attr + * @return URL of base-href within page, or null if none is found. + */ + public static String getTagAttr(StringBuilder page, final String tag, + final String attr) { + + String found = null; + Pattern daPattern = TagMagix.getPattern(tag, attr); + Matcher matcher = daPattern.matcher(page); + int idx = 0; + + if (matcher.find(idx)) { + found = matcher.group(1); + found = trimAttrValue(found); + } + + return found; + } + + /** + * Search through the HTML contained in page, returning the value of a + * particular attribute. This version allows matching only tags that contain + * a particular attribute-value pair, which is useful in extracting META tag + * values, for example, in returning the value of the "content" attribute in + * a META tag that also contains an attribute "http-equiv" with a value of + * "Content-Type". All comparision is case-insensitive, but the value + * returned is the original attribute value, as unmolested as possible. + * + * If nothing matches, returns null. + * + * + * @param page + * StringBuilding holding HTML + * @param tag + * String containing tagname of interest + * @param findAttr + * name of attribute within the tag to return + * @param whereAttr + * only match tags with an attribute whereAttr + * @param whereVal + * only match tags with whereAttr having this value + * @return the value of attribute attr in tag where the tag also contains an + * attribute whereAttr, with value whereVal, or null if nothing + * matches. + */ + public static String getTagAttrWhere(StringBuilder page, final String tag, + final String findAttr, final String whereAttr, final String whereVal) { + + Pattern tagPattern = TagMagix.getWholeTagPattern(tag); + Pattern findAttrPattern = getAttrPattern(findAttr); + Pattern whereAttrPattern = getAttrPattern(whereAttr); + Matcher tagMatcher = tagPattern.matcher(page); + + while (tagMatcher.find()) { + String wholeTag = tagMatcher.group(); + Matcher whereAttrMatcher = whereAttrPattern.matcher(wholeTag); + if (whereAttrMatcher.find()) { + String attrValue = whereAttrMatcher.group(1); + attrValue = trimAttrValue(attrValue); + if (attrValue.compareToIgnoreCase(whereVal) == 0) { + // this tag contains the right set, return the value for + // the attribute findAttr: + Matcher findAttrMatcher = findAttrPattern.matcher(wholeTag); + String value = null; + if (findAttrMatcher.find()) { + value = findAttrMatcher.group(1); + value = trimAttrValue(value); + } + return value; + } + // not the tag we want... maybe there is another: loop + } + } + + return null; + } + + /** + * find and return the href value within a BASE tag inside the HTML document + * within the StringBuffer page. returns null if no BASE-HREF is found. + * + * @param page + * @return URL of base-href within page, or null if none is found. + */ + public static String getBaseHref(StringBuilder page) { + return getTagAttr(page, "BASE", "HREF"); + } +} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/timeline/TimelineReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/timeline/TimelineReplayRenderer.java 2007-07-26 21:47:22 UTC (rev 1894) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/timeline/TimelineReplayRenderer.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -34,11 +34,11 @@ import org.archive.wayback.ResultURIConverter; import org.archive.wayback.WaybackConstants; import org.archive.wayback.archivalurl.JSReplayRenderer; -import org.archive.wayback.archivalurl.TagMagix; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.replay.TagMagix; import org.archive.wayback.util.StringFormatter; /** Deleted: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java 2007-07-26 21:47:22 UTC (rev 1894) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -1,293 +0,0 @@ -/* TagMagixTest - * - * $Id$ - * - * Created on 6:36:07 PM Feb 14, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.archivalurl; - -import junit.framework.TestCase; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class TagMagixTest extends TestCase { - - // snipped and modified from http://www.sudaneseonline.com/ on 20070418... - // note: leading space in description META content - // note: added newlines in Content-Language META tag - // note: no quotes around Author META content - - String thePage = "<html>\n" + - "<head>\n" + - "<meta http-equiv=\"Content-Language\" \n content=\"ar-eg\">\n" + - "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=windows-1256\">\n" + - "<meta name=\"resource-type\" content=\"document\">\n" + - "<meta name=\"classification\" content=\"News\">\n" + - "<meta name=\"test1234\" content=\"one\ntwo\">\n" + - "<meta name=\"description\" content=\" A voice of the Sudan people on the Internet\">\n" + - - "<meta http-equiv=\"Content-Language\" \n content=\"ar-sa\">\n" + - "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=windows-1256\">\n" + - "<META NAME=\"Author\" CONTENT=Bakri Abubakr http://bayanit.com/>\n" + - "<META NAME=\"Author2\" CONTENT=\"Bakri Abubakr http://bayanit.com/\">\n" + - "</head>\n" + - "<body>foo</body>\n" + - "</html>\n"; - - /** - * Tests the code that finds attribute values in tags - */ - public void testFindAttr() { - - checkAttrValue(thePage,"meta","http-equiv","Content-Language"); - } - /** - * - */ - public void testFindAttrWhere() { - checkAttrWhereValue(thePage,"meta","content","http-equiv", - "Content-Type","text/html; charset=windows-1256"); - - checkAttrWhereValue(thePage,"meta","content","http-equiv", - "Content-Language","ar-eg"); - - checkAttrWhereValue(thePage,"meta","content","name", - "classification","News"); - - checkAttrWhereValue(thePage,"meta","content","name", - "test1234","one\ntwo"); - - checkAttrWhereValue(thePage,"meta","content","name", - "ClAsSification","News"); - - checkAttrWhereValue(thePage,"meta","content","name", - "description"," A voice of the Sudan people on the Internet"); - - checkAttrWhereValue(thePage,"meta","content","name", - "description-no-existo",null); - - checkAttrWhereValue(thePage,"meta","content","name", - "author","Bakri"); - - checkAttrWhereValue(thePage,"meta","content","name", - "author2","Bakri Abubakr http://bayanit.com/"); - } - - - /** - * Test method for 'org.archive.wayback.archivalurl.TagMagix.markupTag(StringBuffer, String, String, String, String, String)' - */ - public void testMarkupTag() { - - - // simple simple -- no quotes at all - checkMarkup( - "<A HREF=http://goofy.com/>", - "<A HREF=http://web.archive.org/wayback/2004/http://goofy.com/>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // same test with lower case - checkMarkup( - "<a href=http://goofy.com/>", - "<a href=http://web.archive.org/wayback/2004/http://goofy.com/>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // with funky mixed case - checkMarkup( - "<a hREF=http://goofy.com/>", - "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // more funky mixed case, this time in the attribute to replace argument - checkMarkup( - "<a hREF=http://goofy.com/>", - "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", - "A","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // another funky case permutation, this time in the tagname to replace - checkMarkup( - "<a hREF=http://goofy.com/>", - "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", - "a","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // with double quotes - checkMarkup( - "<A HREF=\"http://goofy.com/\">", - "<A HREF=\"http://web.archive.org/wayback/2004/http://goofy.com/\">", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // single quotes - checkMarkup( - "<A HREF='http://goofy.com/'>", - "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // two tags - checkMarkup( - "<A HREF='http://goofy.com/'><A HREF='http://goofier.com/'>", - "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'><A HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // two tags with newline: - checkMarkup( - "<A HREF='http://goofy.com/'>\n<A HREF='http://goofier.com/'>", - "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'>\n<A HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - - // two tags in "page" but only asking to update one of them - checkMarkup( - "<A HREF='http://goofy.com/'><B HREF='http://goofier.com/'>", - "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'><B HREF='http://goofier.com/'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // two tags, asking to update the other. - checkMarkup( - "<A HREF='http://goofy.com/'><B HREF='http://goofier.com/'>", - "<A HREF='http://goofy.com/'><B HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", - "B","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // simple path relative - checkMarkup( - "<A HREF='index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // simple server relative but irrelavant -- still at top level - checkMarkup( - "<A HREF='/index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // server relative but with non directory base url - checkMarkup( - "<A HREF='/index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir"); - - // server relative being significant - checkMarkup( - "<A HREF='/index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // path relative with non-directory base url - checkMarkup( - "<A HREF='index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir"); - - // path relative in subdirectory - checkMarkup( - "<A HREF='index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/dir/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // don't touch a "malformed" attribute (no closing apos) - checkMarkup( - "<A HREF='index.html>", - "<A HREF='index.html>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // don't touch a "malformed" attribute (no differing quotes around attribute.) - checkMarkup( - "<A HREF='index.html\">", - "<A HREF='index.html\">", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // same as last, but reversed: don't touch a "malformed" attribute (no differing quotes around attribute.) - checkMarkup( - "<A HREF=\"index.html'>", - "<A HREF=\"index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newline in attribute - checkMarkup( - "<A HREF='/index.html'\n FOO='bar'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'\n FOO='bar'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newlines in attribute - checkMarkup( - "<link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\">", - "<link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\">", - "link","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newlines in attribute, plus extra - checkMarkup( - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\"></b>", - "link","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newlines in attribute, plus extra, diff case - checkMarkup( - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\"></b>", - "LINK","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newlines in attribute, plus extra, diff case, no protocol - checkMarkup( - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://archive.org/_style/style.css\"></b>", - "LINK","HREF","http://web.archive.org/wayback/","2004","http://archive.org/dir/"); - - // Javascript escaped quote attribute: - checkMarkup( - "document.write(\"<link rel=\\\"stylesheet\\\" type=\\\"text/css\\\" href=\\\"/css/print.css\\\" />\");", - "document.write(\"<link rel=\\\"stylesheet\\\" type=\\\"text/css\\\" href=\\\"http://web.archive.org/wayback/2004/http://boogle.org/css/print.css\\\" />\");", - "LINK","HREF","http://web.archive.org/wayback/","2004","http://boogle.org/dir/"); - - - } - - private void checkAttrValue(String page, String tag, String attr, - String wantValue) { - StringBuilder sb = new StringBuilder(page); - String foundValue = TagMagix.getTagAttr(sb, tag, attr); - assertEquals(foundValue,wantValue); - } - private void checkAttrWhereValue(String page, String tag, String attr, - String whereAttr, String whereVal, String wantValue) { - StringBuilder sb = new StringBuilder(page); - String foundValue = TagMagix.getTagAttrWhere(sb, tag, attr, whereAttr,whereVal); - if(foundValue != null) { - assertEquals(foundValue,wantValue); - } else { - assertNull(wantValue); - } - } - - private void checkMarkup(String orig, String want, String tag, String attr, String prefix, String ts, String url) { - StringBuilder buf = new StringBuilder(orig); -// if(url.startsWith("http://")) { -// url = url.substring(7); -// } - ArchivalUrlResultURIConverter uriC = new ArchivalUrlResultURIConverter(); - uriC.setReplayURIPrefix(prefix); - TagMagix.markupTagREURIC(buf,uriC,ts,url,tag,attr); - String marked = buf.toString(); - assertEquals(want,marked); - } -} Copied: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java (from rev 1874, trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -0,0 +1,296 @@ +/* TagMagixTest + * + * $Id$ + * + * Created on 6:36:07 PM Feb 14, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import org.archive.wayback.replay.TagMagix; +import org.archive.wayback.archivalurl.ArchivalUrlResultURIConverter; + +import junit.framework.TestCase; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class TagMagixTest extends TestCase { + + // snipped and modified from http://www.sudaneseonline.com/ on 20070418... + // note: leading space in description META content + // note: added newlines in Content-Language META tag + // note: no quotes around Author META content + + String thePage = "<html>\n" + + "<head>\n" + + "<meta http-equiv=\"Content-Language\" \n content=\"ar-eg\">\n" + + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=windows-1256\">\n" + + "<meta name=\"resource-type\" content=\"document\">\n" + + "<meta name=\"classification\" content=\"News\">\n" + + "<meta name=\"test1234\" content=\"one\ntwo\">\n" + + "<meta name=\"description\" content=\" A voice of the Sudan people on the Internet\">\n" + + + "<meta http-equiv=\"Content-Language\" \n content=\"ar-sa\">\n" + + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=windows-1256\">\n" + + "<META NAME=\"Author\" CONTENT=Bakri Abubakr http://bayanit.com/>\n" + + "<META NAME=\"Author2\" CONTENT=\"Bakri Abubakr http://bayanit.com/\">\n" + + "</head>\n" + + "<body>foo</body>\n" + + "</html>\n"; + + /** + * Tests the code that finds attribute values in tags + */ + public void testFindAttr() { + + checkAttrValue(thePage,"meta","http-equiv","Content-Language"); + } + /** + * + */ + public void testFindAttrWhere() { + checkAttrWhereValue(thePage,"meta","content","http-equiv", + "Content-Type","text/html; charset=windows-1256"); + + checkAttrWhereValue(thePage,"meta","content","http-equiv", + "Content-Language","ar-eg"); + + checkAttrWhereValue(thePage,"meta","content","name", + "classification","News"); + + checkAttrWhereValue(thePage,"meta","content","name", + "test1234","one\ntwo"); + + checkAttrWhereValue(thePage,"meta","content","name", + "ClAsSification","News"); + + checkAttrWhereValue(thePage,"meta","content","name", + "description"," A voice of the Sudan people on the Internet"); + + checkAttrWhereValue(thePage,"meta","content","name", + "description-no-existo",null); + + checkAttrWhereValue(thePage,"meta","content","name", + "author","Bakri"); + + checkAttrWhereValue(thePage,"meta","content","name", + "author2","Bakri Abubakr http://bayanit.com/"); + } + + + /** + * Test method for 'org.archive.wayback.archivalurl.TagMagix.markupTag(StringBuffer, String, String, String, String, String)' + */ + public void testMarkupTag() { + + + // simple simple -- no quotes at all + checkMarkup( + "<A HREF=http://goofy.com/>", + "<A HREF=http://web.archive.org/wayback/2004/http://goofy.com/>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // same test with lower case + checkMarkup( + "<a href=http://goofy.com/>", + "<a href=http://web.archive.org/wayback/2004/http://goofy.com/>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // with funky mixed case + checkMarkup( + "<a hREF=http://goofy.com/>", + "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // more funky mixed case, this time in the attribute to replace argument + checkMarkup( + "<a hREF=http://goofy.com/>", + "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", + "A","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // another funky case permutation, this time in the tagname to replace + checkMarkup( + "<a hREF=http://goofy.com/>", + "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", + "a","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // with double quotes + checkMarkup( + "<A HREF=\"http://goofy.com/\">", + "<A HREF=\"http://web.archive.org/wayback/2004/http://goofy.com/\">", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // single quotes + checkMarkup( + "<A HREF='http://goofy.com/'>", + "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // two tags + checkMarkup( + "<A HREF='http://goofy.com/'><A HREF='http://goofier.com/'>", + "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'><A HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // two tags with newline: + checkMarkup( + "<A HREF='http://goofy.com/'>\n<A HREF='http://goofier.com/'>", + "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'>\n<A HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + + // two tags in "page" but only asking to update one of them + checkMarkup( + "<A HREF='http://goofy.com/'><B HREF='http://goofier.com/'>", + "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'><B HREF='http://goofier.com/'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // two tags, asking to update the other. + checkMarkup( + "<A HREF='http://goofy.com/'><B HREF='http://goofier.com/'>", + "<A HREF='http://goofy.com/'><B HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", + "B","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // simple path relative + checkMarkup( + "<A HREF='index.html'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // simple server relative but irrelavant -- still at top level + checkMarkup( + "<A HREF='/index.html'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // server relative but with non directory base url + checkMarkup( + "<A HREF='/index.html'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir"); + + // server relative being significant + checkMarkup( + "<A HREF='/index.html'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // path relative with non-directory base url + checkMarkup( + "<A HREF='index.html'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir"); + + // path relative in subdirectory + checkMarkup( + "<A HREF='index.html'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/dir/index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // don't touch a "malformed" attribute (no closing apos) + checkMarkup( + "<A HREF='index.html>", + "<A HREF='index.html>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // don't touch a "malformed" attribute (no differing quotes around attribute.) + checkMarkup( + "<A HREF='index.html\">", + "<A HREF='index.html\">", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // same as last, but reversed: don't touch a "malformed" attribute (no differing quotes around attribute.) + checkMarkup( + "<A HREF=\"index.html'>", + "<A HREF=\"index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // newline in attribute + checkMarkup( + "<A HREF='/index.html'\n FOO='bar'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'\n FOO='bar'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // newlines in attribute + checkMarkup( + "<link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\">", + "<link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\">", + "link","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // newlines in attribute, plus extra + checkMarkup( + "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", + "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\"></b>", + "link","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // newlines in attribute, plus extra, diff case + checkMarkup( + "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", + "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\"></b>", + "LINK","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // newlines in attribute, plus extra, diff case, no protocol + checkMarkup( + "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", + "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://archive.org/_style/style.css\"></b>", + "LINK","HREF","http://web.archive.org/wayback/","2004","http://archive.org/dir/"); + + // Javascript escaped quote attribute: + checkMarkup( + "document.write(\"<link rel=\\\"stylesheet\\\" type=\\\"text/css\\\" href=\\\"/css/print.css\\\" />\");", + "document.write(\"<link rel=\\\"stylesheet\\\" type=\\\"text/css\\\" href=\\\"http://web.archive.org/wayback/2004/http://boogle.org/css/print.css\\\" />\");", + "LINK","HREF","http://web.archive.org/wayback/","2004","http://boogle.org/dir/"); + + + } + + private void checkAttrValue(String page, String tag, String attr, + String wantValue) { + StringBuilder sb = new StringBuilder(page); + String foundValue = TagMagix.getTagAttr(sb, tag, attr); + assertEquals(foundValue,wantValue); + } + private void checkAttrWhereValue(String page, String tag, String attr, + String whereAttr, String whereVal, String wantValue) { + StringBuilder sb = new StringBuilder(page); + String foundValue = TagMagix.getTagAttrWhere(sb, tag, attr, whereAttr,whereVal); + if(foundValue != null) { + assertEquals(foundValue,wantValue); + } else { + assertNull(wantValue); + } + } + + private void checkMarkup(String orig, String want, String tag, String attr, String prefix, String ts, String url) { + StringBuilder buf = new StringBuilder(orig); +// if(url.startsWith("http://")) { +// url = url.substring(7); +// } + ArchivalUrlResultURIConverter uriC = new ArchivalUrlResultURIConverter(); + uriC.setReplayURIPrefix(prefix); + TagMagix.markupTagREURIC(buf,uriC,ts,url,tag,attr); + String marked = buf.toString(); + assertEquals(want,marked); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |