From: <al...@us...> - 2008-03-04 04:45:58
|
Revision: 2214 http://archive-access.svn.sourceforge.net/archive-access/?rev=2214&view=rev Author: alexoz Date: 2008-03-03 20:45:10 -0800 (Mon, 03 Mar 2008) Log Message: ----------- * AccessControlClient.java, NewSurtTokenizer.java Allow specifying a SURT with a scheme. * NewSurtTokenizer.java, NewSurtTokenizerTest.java Added method to return a list of searches (to replace functionality of old SURT tokenizer). * RuleSet.java Changed to use new SURT tokenizer. * RuleSetTest.java Updated tests to reflect the new tokenizing behaviour. * SURTTokenizer2.java, SURTTokenizer2Test.java Removed old SURT tokenizer. Modified Paths: -------------- trunk/archive-access/projects/access-control/access-control/.classpath trunk/archive-access/projects/access-control/access-control/.project trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/AccessControlClient.java trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/RuleSet.java trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/surt/NewSurtTokenizer.java trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/accesscontrol/model/RuleSetTest.java trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/NewSurtTokenizerTest.java Removed Paths: ------------- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/surt/SURTTokenizer2.java trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/SURTTokenizer2Test.java Modified: trunk/archive-access/projects/access-control/access-control/.classpath =================================================================== --- trunk/archive-access/projects/access-control/access-control/.classpath 2008-03-04 04:41:20 UTC (rev 2213) +++ trunk/archive-access/projects/access-control/access-control/.classpath 2008-03-04 04:45:10 UTC (rev 2214) @@ -4,33 +4,33 @@ <classpathentry kind="src" path="src/test/java" output="target/test-classes"/> <classpathentry kind="output" path="target/classes"/> <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> - <classpathentry kind="var" path="M2_REPO/commons-lang/commons-lang/2.3/commons-lang-2.3.jar"/> - <classpathentry kind="var" path="M2_REPO/org/dnsjava/dnsjava/2.0.3/dnsjava-2.0.3.jar"/> - <classpathentry kind="var" path="M2_REPO/it/unimi/dsi/mg4j/1.0.1/mg4j-1.0.1.jar"/> + <classpathentry kind="var" path="M2_REPO/xpp3/xpp3_min/1.1.3.4.O/xpp3_min-1.1.3.4.O.jar"/> + <classpathentry kind="var" path="M2_REPO/fastutil/fastutil/5.0.7/fastutil-5.0.7.jar"/> + <classpathentry kind="var" path="M2_REPO/org/gnu/inet/libidn/0.6.5/libidn-0.6.5.jar"/> + <classpathentry kind="var" path="M2_REPO/com/lowagie/itext/1.3/itext-1.3.jar"/> + <classpathentry kind="var" path="M2_REPO/commons-logging/commons-logging/1.0.4/commons-logging-1.0.4.jar"/> + <classpathentry kind="var" path="M2_REPO/net/htmlparser/jericho/jericho-html/2.3/jericho-html-2.3.jar"/> + <classpathentry kind="var" path="M2_REPO/commons-cli/commons-cli/1.0/commons-cli-1.0.jar"/> + <classpathentry kind="var" path="M2_REPO/org/archive/overlays/archive-overlay-commons-pool/1.3/archive-overlay-commons-pool-1.3.jar"/> + <classpathentry kind="var" path="M2_REPO/oro/oro/2.0.8/oro-2.0.8.jar"/> + <classpathentry kind="var" path="M2_REPO/commons-net/commons-net/1.4.1/commons-net-1.4.1.jar"/> + <classpathentry kind="var" path="M2_REPO/junit/junit/4.4/junit-4.4.jar"/> + <classpathentry kind="var" path="M2_REPO/poi/poi-scratchpad/2.5.1-final-20040804/poi-scratchpad-2.5.1-final-20040804.jar"/> <classpathentry kind="var" path="M2_REPO/org/archive/heritrix/commons/2.0.0-RC1/commons-2.0.0-RC1.jar"/> <classpathentry kind="var" path="M2_REPO/org/codehaus/jettison/jettison/1.0-beta-1/jettison-1.0-beta-1.jar"/> + <classpathentry kind="var" path="M2_REPO/commons-dbcp/commons-dbcp/1.2.2/commons-dbcp-1.2.2.jar"/> + <classpathentry kind="var" path="M2_REPO/commons-lang/commons-lang/2.3/commons-lang-2.3.jar"/> <classpathentry kind="var" path="M2_REPO/net/java/dev/jets3t/jets3t/0.5.0/jets3t-0.5.0.jar"/> - <classpathentry kind="var" path="M2_REPO/commons-cli/commons-cli/1.0/commons-cli-1.0.jar"/> - <classpathentry kind="var" path="M2_REPO/stax/stax-api/1.0.1/stax-api-1.0.1.jar"/> - <classpathentry kind="var" path="M2_REPO/junit/junit/4.4/junit-4.4.jar"/> - <classpathentry kind="var" path="M2_REPO/org/archive/overlays/archive-overlay-commons-httpclient/3.1/archive-overlay-commons-httpclient-3.1.jar"/> - <classpathentry kind="var" path="M2_REPO/org/archive/overlays/archive-overlay-commons-pool/1.3/archive-overlay-commons-pool-1.3.jar"/> - <classpathentry kind="var" path="M2_REPO/commons-io/commons-io/1.3.1/commons-io-1.3.1.jar"/> - <classpathentry kind="var" path="M2_REPO/poi/poi-scratchpad/2.5.1-final-20040804/poi-scratchpad-2.5.1-final-20040804.jar"/> - <classpathentry kind="var" path="M2_REPO/net/htmlparser/jericho/jericho-html/2.3/jericho-html-2.3.jar"/> - <classpathentry kind="var" path="M2_REPO/org/gnu/inet/libidn/0.6.5/libidn-0.6.5.jar"/> - <classpathentry kind="var" path="M2_REPO/commons-pool/commons-pool/1.3/commons-pool-1.3.jar"/> + <classpathentry kind="var" path="M2_REPO/poi/poi/2.5.1-final-20040804/poi-2.5.1-final-20040804.jar"/> + <classpathentry kind="var" path="M2_REPO/com/thoughtworks/xstream/xstream/1.2.2/xstream-1.2.2.jar"/> <classpathentry kind="var" path="M2_REPO/com/anotherbigidea/javaswf/CVS-SNAPSHOT-1/javaswf-CVS-SNAPSHOT-1.jar"/> <classpathentry kind="var" path="M2_REPO/commons-codec/commons-codec/1.3/commons-codec-1.3.jar"/> + <classpathentry kind="var" path="M2_REPO/berkeleydb/je/3.2.44/je-3.2.44.jar"/> + <classpathentry kind="var" path="M2_REPO/commons-pool/commons-pool/1.3/commons-pool-1.3.jar"/> + <classpathentry kind="var" path="M2_REPO/org/archive/overlays/archive-overlay-commons-httpclient/3.1/archive-overlay-commons-httpclient-3.1.jar"/> + <classpathentry kind="var" path="M2_REPO/it/unimi/dsi/mg4j/1.0.1/mg4j-1.0.1.jar"/> + <classpathentry kind="var" path="M2_REPO/org/dnsjava/dnsjava/2.0.3/dnsjava-2.0.3.jar"/> <classpathentry kind="var" path="M2_REPO/commons-collections/commons-collections/3.1/commons-collections-3.1.jar"/> - <classpathentry kind="var" path="M2_REPO/fastutil/fastutil/5.0.7/fastutil-5.0.7.jar"/> - <classpathentry kind="var" path="M2_REPO/commons-logging/commons-logging/1.0.4/commons-logging-1.0.4.jar"/> - <classpathentry kind="var" path="M2_REPO/oro/oro/2.0.8/oro-2.0.8.jar"/> - <classpathentry kind="var" path="M2_REPO/poi/poi/2.5.1-final-20040804/poi-2.5.1-final-20040804.jar"/> - <classpathentry kind="var" path="M2_REPO/commons-dbcp/commons-dbcp/1.2.2/commons-dbcp-1.2.2.jar"/> - <classpathentry kind="var" path="M2_REPO/xpp3/xpp3_min/1.1.3.4.O/xpp3_min-1.1.3.4.O.jar"/> - <classpathentry kind="var" path="M2_REPO/com/lowagie/itext/1.3/itext-1.3.jar"/> - <classpathentry kind="var" path="M2_REPO/berkeleydb/je/3.2.44/je-3.2.44.jar"/> - <classpathentry kind="var" path="M2_REPO/commons-net/commons-net/1.4.1/commons-net-1.4.1.jar"/> - <classpathentry kind="var" path="M2_REPO/com/thoughtworks/xstream/xstream/1.2.2/xstream-1.2.2.jar"/> + <classpathentry kind="var" path="M2_REPO/stax/stax-api/1.0.1/stax-api-1.0.1.jar"/> + <classpathentry kind="var" path="M2_REPO/commons-io/commons-io/1.3.1/commons-io-1.3.1.jar"/> </classpath> \ No newline at end of file Modified: trunk/archive-access/projects/access-control/access-control/.project =================================================================== --- trunk/archive-access/projects/access-control/access-control/.project 2008-03-04 04:41:20 UTC (rev 2213) +++ trunk/archive-access/projects/access-control/access-control/.project 2008-03-04 04:45:10 UTC (rev 2214) @@ -12,11 +12,15 @@ <buildCommand> <name>org.eclipse.wst.validation.validationbuilder</name> </buildCommand> + <buildCommand> + <name>org.maven.ide.eclipse.maven2Builder</name> + </buildCommand> </buildSpec> <natures> <nature>org.eclipse.wst.common.project.facet.core.nature</nature> <nature>org.eclipse.jdt.core.javanature</nature> <nature>org.eclipse.wst.common.modulecore.ModuleCoreNature</nature> <nature>org.eclipse.jem.workbench.JavaEMFNature</nature> + <nature>org.maven.ide.eclipse.maven2Nature</nature> </natures> </projectDescription> \ No newline at end of file Modified: trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/AccessControlClient.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/AccessControlClient.java 2008-03-04 04:41:20 UTC (rev 2213) +++ trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/AccessControlClient.java 2008-03-04 04:45:10 UTC (rev 2214) @@ -101,10 +101,8 @@ String publicSuffix = PublicSuffixes .reduceSurtToTopmostAssigned(getSurtAuthority(surt)); - surt = stripScheme(surt); + RuleSet rules = ruleDao.getRuleTree(getScheme(surt) + "(" + publicSuffix); - RuleSet rules = ruleDao.getRuleTree("(" + publicSuffix); - Rule matchingRule = rules.getMatchingRule(surt, captureDate, retrievalDate, who); return matchingRule; @@ -142,13 +140,13 @@ return surt.substring(indexOfOpen + 4, indexOfClose); } - protected static String stripScheme(String surt) { + protected static String getScheme(String surt) { int i = surt.indexOf("://"); int j = surt.indexOf(":"); if (i >= 0 && i == j) { - return surt.substring(i + 3); + return surt.substring(0, i + 3); } else { - return surt; + return ""; } } Modified: trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/RuleSet.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/RuleSet.java 2008-03-04 04:41:20 UTC (rev 2213) +++ trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/RuleSet.java 2008-03-04 04:45:10 UTC (rev 2214) @@ -5,8 +5,7 @@ import java.util.Iterator; import java.util.TreeSet; -import org.archive.surt.SURTTokenizer2; - +import org.archive.surt.NewSurtTokenizer; import sun.reflect.generics.reflectiveObjects.NotImplementedException; /** @@ -67,16 +66,10 @@ public Rule getMatchingRule(String surt, Date captureDate, Date retrievalDate, String who) { - SURTTokenizer2 tok = SURTTokenizer2.newFromSURT(surt); + NewSurtTokenizer tok = new NewSurtTokenizer(surt); boolean done = false; - while (!done) { - String key = tok.nextSearch(); - if (key == null) { - key = "("; - done = true; - } - + for (String key: tok.getSearchList()) { Iterable<Rule> rules = rulemap.get(key); if (rules != null) { for (Rule rule : rules) { Modified: trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/surt/NewSurtTokenizer.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/surt/NewSurtTokenizer.java 2008-03-04 04:41:20 UTC (rev 2213) +++ trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/surt/NewSurtTokenizer.java 2008-03-04 04:45:10 UTC (rev 2214) @@ -8,9 +8,9 @@ /** * The new SURT tokenizer breaks a SURT up into tokens. * - * For example "(org,archive,www,)/path/file.html?query#anchor" is broken up into: + * For example "http://(org,archive,www,)/path/file.html?query#anchor" is broken up into: * - * ["(" + * ["http://(" * "org," * "archive," * "www," @@ -76,9 +76,13 @@ return surtLength; } - // ROOT: "(..." + // Scheme: "http://(..." if (pos == 0) { - return 1; // "(" + int i = surt.indexOf('('); + if (i == -1) { + return preTabLength; + } + return i + 1; // "http://(" } // Host components: "foo,..." if (pos < endOfAuthority || endOfAuthority == -1) { @@ -150,4 +154,26 @@ return (String[]) toList().toArray(); } + /** + * Return a list of searches in order of decreasing length. For example + * given the surt "(org,archive,)/fishing" return: + * + * [ "(org,archive,)/fishing", + * "(org,archive,)/", + * "(org,archive,", + * "(org,", + * "(" + * ] + * @return + */ + public List<String> getSearchList() { + List<String> searches = new ArrayList<String>(); + String running = ""; + for (String token: this) { + running += token; + searches.add(0, running); + } + return searches; + } + } Deleted: trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/surt/SURTTokenizer2.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/surt/SURTTokenizer2.java 2008-03-04 04:41:20 UTC (rev 2213) +++ trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/surt/SURTTokenizer2.java 2008-03-04 04:45:10 UTC (rev 2214) @@ -1,201 +0,0 @@ -/* SURTTokenizer - * - * $Id: SURTTokenizer.java 4795 2006-12-12 23:42:09Z paul_jack $ - * - * Created on 3:21:49 PM May 11, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.surt; - -import java.util.Iterator; - -import org.apache.commons.httpclient.URIException; -import org.archive.net.UURI; -import org.archive.net.UURIFactory; -import org.archive.util.ArchiveUtils; -import org.archive.util.SURT; - -import sun.reflect.generics.reflectiveObjects.NotImplementedException; - -/** - * provides iterative Url reduction for prefix matching to find ever coarser - * grained URL-specific configuration. Assumes that a prefix binary search is - * being attempted for each returned value. First value is the entire SURT - * url String, with TAB appended. Second removes CGI ARGs. Then each subsequent - * path segment ('/' separated) is removed. Then the login:password, if present - * is removed. Then the port, if not :80 or omitted on the initial URL. Then - * each subsequent authority segment(. separated) is removed. - * - * the nextSearch() method will return null, finally, when no broader searches - * can be attempted on the URL. - * - * @author brad - * @version $Date: 2006-12-12 15:42:09 -0800 (Tue, 12 Dec 2006) $, $Revision: 4795 $ - */ -public class SURTTokenizer2 { - - public final static String EXACT_SUFFIX = "\t"; - private String remainder; - private boolean triedExact; - private boolean triedFull; - private boolean choppedArgs; - private boolean choppedPath; - private boolean choppedLogin; - private boolean choppedPort; - - public SURTTokenizer2() { - } - - public void setSURT(String surt) { - remainder = getKeyFromSURT(surt, false); - } - - /** - * constructor - * - * @param url String URL - * @throws URIException - */ - public SURTTokenizer2(final String url) throws URIException { - remainder = getKey(url,false); - } - /** - * update internal state and return the next smaller search string - * for the url - * - * @return string to lookup for prefix match for relevant information. - */ - public String nextSearch() { - if(!triedExact) { - triedExact = true; - //remainder = remainder.substring(0,remainder.length()-1); - return remainder + EXACT_SUFFIX; - } - if(!triedFull) { - triedFull = true; - return remainder; - } - if(!choppedArgs) { - choppedArgs = true; - int argStart = remainder.indexOf('?'); - if(argStart != -1) { - remainder = remainder.substring(0,argStart); - return remainder; - } - } - if(!choppedPath) { - int lastSlash = remainder.lastIndexOf('/'); - if(lastSlash != -1) { - remainder = remainder.substring(0,lastSlash); - if(remainder.endsWith(")")) { - remainder = remainder.substring(0,remainder.length()-1); - } - return remainder; - } - choppedPath = true; - } - if(!choppedLogin) { - choppedLogin = true; - int lastAt = remainder.lastIndexOf('@'); - if(lastAt != -1) { - remainder = remainder.substring(0,lastAt); - if(remainder.endsWith(",")) { - remainder = remainder.substring(0,remainder.length()-1); - } - return remainder; - } - } - if(!choppedPort) { - choppedPort = true; - int lastColon = remainder.lastIndexOf(':'); - if(lastColon != -1) { - remainder = remainder.substring(0,lastColon); - if(remainder.endsWith(",")) { - remainder = remainder.substring(0,remainder.length()-1); - } - return remainder; - } - } - // now just remove ','s - int lastComma = remainder.lastIndexOf(','); - if(lastComma == -1) { - return null; - } - remainder = remainder.substring(0,lastComma); - return remainder; - } - - /** - * @param url - * @return String SURT which will match exactly argument url - * @throws URIException - */ - public static String exactKey(String url) throws URIException { - return getKey(url,false); - } - - /** - * @param url - * @return String SURT which will match urls prefixed with the argument url - * @throws URIException - */ - public static String prefixKey(String url) throws URIException { - return getKey(url,true); - } - - private static String getKey(String url, boolean prefix) - throws URIException { - - String key = ArchiveUtils.addImpliedHttpIfNecessary(url); - UURI uuri = UURIFactory.getInstance(key); - key = uuri.getScheme() + "://" + uuri.getAuthority() + - uuri.getEscapedPathQuery(); - - key = SURT.fromURI(key); - key = getKeyFromSURT(key, prefix); - return key; - } - - private static String getKeyFromSURT(String surtKey, boolean prefix) { - int hashPos = surtKey.indexOf('#'); - if(hashPos != -1) { - surtKey = surtKey.substring(0,hashPos); - } - - if(surtKey.startsWith("http://")) { - surtKey = surtKey.substring(7); - } - if(prefix) { - if(surtKey.endsWith(")/")) { - surtKey = surtKey.substring(0,surtKey.length()-2); - } - } - return surtKey; - } - - /** - * Create new SURTTokenizer from a SURT rather than a URL. - */ - public static SURTTokenizer2 newFromSURT(String surt) { - SURTTokenizer2 tok = new SURTTokenizer2(); - tok.setSURT(surt); - return tok; - } -} Modified: trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/accesscontrol/model/RuleSetTest.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/accesscontrol/model/RuleSetTest.java 2008-03-04 04:41:20 UTC (rev 2213) +++ trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/accesscontrol/model/RuleSetTest.java 2008-03-04 04:45:10 UTC (rev 2214) @@ -14,9 +14,9 @@ public void testSimple() { ruleset = new RuleSet(); ruleset.add(new Rule("robots", "(")); - ruleset.add(new Rule("allow", "(org,archive")); - ruleset.add(new Rule("block", "(org,archive,)/secret")); - ruleset.add(new Rule("allow", "(org,archive,)/secret/public")); + ruleset.add(new Rule("allow", "(org,archive,")); + ruleset.add(new Rule("block", "(org,archive,)/secret/")); + ruleset.add(new Rule("allow", "(org,archive,)/secret/public/")); for (Rule rule: ruleset) { System.out.println(rule.getSurt()); @@ -37,9 +37,9 @@ ruleset = new RuleSet(); ruleset.add(new Rule("robots", "(")); ruleset.add(new Rule("allow", "(")); - ruleset.add(new Rule("block", "(org,archive,)/secret")); - ruleset.add(new Rule("allow", "(org,archive,)/secret/public")); - ruleset.add(new Rule("block", "(org,archive,)/secret/public")); + ruleset.add(new Rule("block", "(org,archive,)/secret/")); + ruleset.add(new Rule("allow", "(org,archive,)/secret/public/")); + ruleset.add(new Rule("block", "(org,archive,)/secret/public/")); assertEquals("allow", ruleset.getMatchingRule("(org", new Date(), new Date(), null).getPolicy()); assertEquals("allow", ruleset.getMatchingRule("(org,archive,)/", new Date(), new Date(), null).getPolicy()); @@ -52,7 +52,7 @@ public void testEmbargo() { ruleset = new RuleSet(); ruleset.add(new Rule("robots", "(")); - ruleset.add(new Rule("allow", "(org,archive")); + ruleset.add(new Rule("allow", "(org,archive,")); ruleset.add(new Rule("block", "(org,archive,)/classified/")); ruleset.add(new Rule("allow", "(org,archive,)/classified/", 60 * 60 * 24 * 90)); @@ -70,9 +70,9 @@ public void testGroup() { ruleset = new RuleSet(); ruleset.add(new Rule("robots", "(")); - ruleset.add(new Rule("allow", "(org,archive", "archivists")); - ruleset.add(new Rule("block", "(org,archive,)/classified")); - ruleset.add(new Rule("allow", "(org,archive,)/classified", "admins")); + ruleset.add(new Rule("allow", "(org,archive,", "archivists")); + ruleset.add(new Rule("block", "(org,archive,)/classified/")); + ruleset.add(new Rule("allow", "(org,archive,)/classified/", "admins")); assertEquals("robots", ruleset.getMatchingRule("(org,archive,www,)/index.html", new Date(), new Date(), "dinosaurs").getPolicy()); assertEquals("allow", ruleset.getMatchingRule("(org,archive,www,)/index.html", new Date(), new Date(), "archivists").getPolicy()); Modified: trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/NewSurtTokenizerTest.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/NewSurtTokenizerTest.java 2008-03-04 04:41:20 UTC (rev 2213) +++ trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/NewSurtTokenizerTest.java 2008-03-04 04:45:10 UTC (rev 2214) @@ -7,10 +7,10 @@ public class NewSurtTokenizerTest extends TestCase { public void testRoot() { - NewSurtTokenizer tok = new NewSurtTokenizer("("); + NewSurtTokenizer tok = new NewSurtTokenizer("http://("); Iterator<String> it = tok.iterator(); - assertEquals("(", it.next()); + assertEquals("http://(", it.next()); assertFalse(it.hasNext()); } @@ -34,10 +34,10 @@ } public void testFewSegs() { - NewSurtTokenizer tok = new NewSurtTokenizer("(org,archive,www"); + NewSurtTokenizer tok = new NewSurtTokenizer("http://(org,archive,www"); Iterator<String> it = tok.iterator(); - assertEquals("(", it.next()); + assertEquals("http://(", it.next()); assertEquals("org,", it.next()); assertEquals("archive,", it.next()); assertEquals("www", it.next()); @@ -68,10 +68,10 @@ } public void testPage() { - NewSurtTokenizer tok = new NewSurtTokenizer("(org,archive,www,)/about.html"); + NewSurtTokenizer tok = new NewSurtTokenizer("http://(org,archive,www,)/about.html"); Iterator<String> it = tok.iterator(); - assertEquals("(", it.next()); + assertEquals("http://(", it.next()); assertEquals("org,", it.next()); assertEquals("archive,", it.next()); assertEquals("www,", it.next()); @@ -145,10 +145,10 @@ } public void testQueryAnchor() { - NewSurtTokenizer tok = new NewSurtTokenizer("(org,archive,www,)/fishes/pinky.html?moo=yes&bar=12#423"); + NewSurtTokenizer tok = new NewSurtTokenizer("ftp://(org,archive,www,)/fishes/pinky.html?moo=yes&bar=12#423"); Iterator<String> it = tok.iterator(); - assertEquals("(", it.next()); + assertEquals("ftp://(", it.next()); assertEquals("org,", it.next()); assertEquals("archive,", it.next()); assertEquals("www,", it.next()); @@ -185,4 +185,19 @@ } + public void testSearchList() { + NewSurtTokenizer tok = new NewSurtTokenizer("(org,archive,www,)/fishes/pinky.html?moo=yes&bar=12#423"); + Iterator<String> it = tok.getSearchList().iterator(); + + assertEquals("(org,archive,www,)/fishes/pinky.html?moo=yes&bar=12#423", it.next()); + assertEquals("(org,archive,www,)/fishes/pinky.html?moo=yes&bar=12", it.next()); + assertEquals("(org,archive,www,)/fishes/pinky.html", it.next()); + assertEquals("(org,archive,www,)/fishes/", it.next()); + assertEquals("(org,archive,www,)/", it.next()); + assertEquals("(org,archive,www,", it.next()); + assertEquals("(org,archive,", it.next()); + assertEquals("(org,", it.next()); + assertEquals("(", it.next()); + assertFalse(it.hasNext()); + } } Deleted: trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/SURTTokenizer2Test.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/SURTTokenizer2Test.java 2008-03-04 04:41:20 UTC (rev 2213) +++ trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/SURTTokenizer2Test.java 2008-03-04 04:45:10 UTC (rev 2214) @@ -1,203 +0,0 @@ -/* SURTTokenizerTest - * - * $Id: SURTTokenizerTest.java 5056 2007-04-11 17:15:33Z paul_jack $ - * - * Created on 3:40:18 PM May 11, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.surt; - -import junit.framework.TestCase; - -import org.apache.commons.httpclient.URIException; -import org.archive.surt.SURTTokenizer2; - -/** - * - * - * @author brad - * @version $Date: 2007-04-11 10:15:33 -0700 (Wed, 11 Apr 2007) $, $Revision: 5056 $ - */ -public class SURTTokenizer2Test extends TestCase { - - SURTTokenizer2 tok; - /** - * Test method for 'org.archive.wayback.accesscontrol.SURTTokenizer.nextSearch()' - */ - public void testSimple() { - tok = toSurtT("http://www.archive.org/foo"); - assertEquals("(org,archive,www,)/foo\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/foo",tok.nextSearch()); - assertEquals("(org,archive,www,",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testSimpleSurt() { - tok = SURTTokenizer2.newFromSURT("http://(org,archive,www,)/foo"); - assertEquals("(org,archive,www,)/foo\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/foo",tok.nextSearch()); - assertEquals("(org,archive,www,",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testSlashPath() { - tok = toSurtT("http://www.archive.org/"); - assertEquals("(org,archive,www,)/\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/",tok.nextSearch()); - assertEquals("(org,archive,www,",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - - /** test */ - public void testEmptyPath() { - tok = toSurtT("http://www.archive.org"); - assertEquals("(org,archive,www,)/\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/",tok.nextSearch()); - assertEquals("(org,archive,www,",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - - - /** test */ - public void testEmptyPathMore() { - tok = toSurtT("http://brad.www.archive.org"); - assertEquals("(org,archive,www,brad,)/\t",tok.nextSearch()); - assertEquals("(org,archive,www,brad,)/",tok.nextSearch()); - assertEquals("(org,archive,www,brad,",tok.nextSearch()); - assertEquals("(org,archive,www,brad",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testLongPathMore() { - tok = toSurtT("http://brad.www.archive.org/one/two"); - assertEquals("( org, archive, www, brad, )/ one/ two\t",tok.nextSearch()); - assertEquals("(org,archive,www,brad,)/one/two",tok.nextSearch()); - assertEquals("(org,archive,www,brad,)/one",tok.nextSearch()); - assertEquals("(org,archive,www,brad,",tok.nextSearch()); - assertEquals("(org,archive,www,brad",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testShortPathHash() { - tok = toSurtT("http://www.archive.org/one/two#hash"); - assertEquals("(org,archive,www,)/one/two\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/one/two",tok.nextSearch()); - assertEquals("(org,archive,www,)/one",tok.nextSearch()); - assertEquals("(org,archive,www,",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testCGI1() { - tok = toSurtT("http://www.archive.org/cgi?foobar"); - assertEquals("(org,archive,www,)/cgi?foobar\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/cgi?foobar",tok.nextSearch()); - assertEquals("(org,archive,www,)/cgi",tok.nextSearch()); - assertEquals("(org,archive,www,",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testPort() { - tok = toSurtT("http://www.archive.org:8080/cgi?foobar"); - assertEquals("(org,archive,www,:8080)/cgi?foobar\t",tok.nextSearch()); - assertEquals("(org,archive,www,:8080)/cgi?foobar",tok.nextSearch()); - assertEquals("(org,archive,www,:8080)/cgi",tok.nextSearch()); - assertEquals("(org,archive,www,:8080",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testLogin() { - tok = toSurtT("http://br...@ww.../cgi?foobar"); - assertEquals("(org,archive,www,@brad)/cgi?foobar\t",tok.nextSearch()); - assertEquals("(org,archive,www,@brad)/cgi?foobar",tok.nextSearch()); - assertEquals("(org,archive,www,@brad)/cgi",tok.nextSearch()); - assertEquals("(org,archive,www,@brad",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testLoginPass() { - tok = toSurtT("http://brad:pa...@ww.../cgi?foobar"); - assertEquals("(org,archive,www,@brad:pass)/cgi?foobar\t",tok.nextSearch()); - assertEquals("(org,archive,www,@brad:pass)/cgi?foobar",tok.nextSearch()); - assertEquals("(org,archive,www,@brad:pass)/cgi",tok.nextSearch()); - assertEquals("(org,archive,www,@brad:pass",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } -// /** test */ - // leave this guy out for now: was a bug in Heritrix thus archive-commons - // wait for new jar... -// public void testLoginPassPort() { -// tok = toSurtT("http://brad:pa...@ww...:8080/cgi?foobar"); -// assertEquals("(org,archive,www,:8080@brad:pass)/cgi?foobar\t",tok.nextSearch()); -// assertEquals("(org,archive,www,:8080@brad:pass)/cgi?foobar",tok.nextSearch()); -// assertEquals("(org,archive,www,:8080@brad:pass)/cgi",tok.nextSearch()); -// assertEquals("(org,archive,www,:8080@brad:pass",tok.nextSearch()); -// assertEquals("(org,archive,www,:8080",tok.nextSearch()); -// assertEquals("(org,archive,www",tok.nextSearch()); -// assertEquals("(org,archive",tok.nextSearch()); -// assertEquals("(org",tok.nextSearch()); -// assertNull(tok.nextSearch()); -// } -// - - private SURTTokenizer2 toSurtT(final String u) { - SURTTokenizer2 tok = null; - try { - tok = new SURTTokenizer2(u); - } catch (URIException e) { - e.printStackTrace(); - assertFalse("URL Exception " + e.getLocalizedMessage(),true); - } - return tok; - } - -} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |