From: <al...@us...> - 2008-02-27 11:12:03
|
Revision: 2206 http://archive-access.svn.sourceforge.net/archive-access/?rev=2206&view=rev Author: alexoz Date: 2008-02-27 03:12:04 -0800 (Wed, 27 Feb 2008) Log Message: ----------- Added new and improved SURT tokenizer which leaves on trailing commas. Modified Paths: -------------- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/Rule.java trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/surt/NewSurtTokenizer.java trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/NewSurtTokenizerTest.java trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/SURTTokenizer2Test.java Modified: trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/Rule.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/Rule.java 2008-02-27 11:10:51 UTC (rev 2205) +++ trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/accesscontrol/model/Rule.java 2008-02-27 11:12:04 UTC (rev 2206) @@ -33,6 +33,7 @@ private String privateComment; private String publicComment; private Boolean enabled; + private boolean exactMatch; public Rule() { @@ -86,6 +87,7 @@ setSurt(rule.getSurt()); setWho(rule.getWho()); setEnabled(rule.getEnabled()); + setExactMatch(rule.isExactMatch()); } /** @@ -259,18 +261,37 @@ /* * Rules are sorted in descending order of "specificity". - * So we order first by SURT, then group, then policy. + * So we order first by SURT, exact-match, + * then group, then policy. */ public int compareTo(Rule o) { int i = getSurt().compareTo(o.getSurt()); if (i == 0) { - if (getWho() != null && o.getWho() == null) { + // exact matches come before non-exact + if (isExactMatch() && !o.isExactMatch()) { i = -1; + } else if (!isExactMatch() && o.isExactMatch()) { + i = 1; + + // non-null groups come before null groups + } else if (getWho() != null && o.getWho() == null) { + i = -1; } else if (getWho() == null && o.getWho() != null) { i = 1; } else { i = getPolicyId().compareTo(o.getPolicyId()); } + + + // if we're still equal try capture date start + if (i == 0 && getCaptureStart() != null) { + i = getCaptureStart().compareTo(o.getCaptureStart()); + } + + // and retrieval date + if (i == 0 && getRetrievalStart() != null) { + i = getRetrievalStart().compareTo(o.getRetrievalStart()); + } } return i; } @@ -329,4 +350,12 @@ public boolean matches(String surt, Date captureDate, Date retrievalDate, String who2) { return (who == null || who == who2) && matches(surt, captureDate, retrievalDate); } + + public boolean isExactMatch() { + return exactMatch; + } + + public void setExactMatch(boolean exactMatch) { + this.exactMatch = exactMatch; + } } Modified: trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/surt/NewSurtTokenizer.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/surt/NewSurtTokenizer.java 2008-02-27 11:10:51 UTC (rev 2205) +++ trunk/archive-access/projects/access-control/access-control/src/main/java/org/archive/surt/NewSurtTokenizer.java 2008-02-27 11:12:04 UTC (rev 2206) @@ -28,11 +28,19 @@ private int endOfAuthority; private int endOfPath; private int surtLength; + private int preTabLength; public NewSurtTokenizer(String surt) { super(); this.surt = surt; surtLength = surt.length(); + + if (surt.charAt(surtLength - 1) == '\t') { + preTabLength = surtLength - 1; + } else { + preTabLength = surtLength; + } + endOfAuthority = surt.indexOf(')'); if (endOfAuthority == -1) { endOfAuthority = surtLength; @@ -61,6 +69,13 @@ } private int nextPieceEnd() { + // Special case: If the SURT ends with a tab, we treat that as an extra token. + // A trailing tab is sometimes used (for better or worse) to make a distinction between + // and exact match and prefix match. + if (pos >= preTabLength && pos < surtLength) { + return surtLength; + } + // ROOT: "(..." if (pos == 0) { return 1; // "(" @@ -69,7 +84,7 @@ if (pos < endOfAuthority || endOfAuthority == -1) { int endOfHostComponent = surt.indexOf(',', pos); if (endOfHostComponent == -1) { - return surtLength; + return preTabLength; } else { return endOfHostComponent + 1; } @@ -88,7 +103,7 @@ } else if (endOfPath != -1) { // file: "hello.html" return endOfPath; } else { - return surtLength; + return preTabLength; } } @@ -98,12 +113,12 @@ if (endOfQuery != -1) { return endOfQuery; } else { - return surtLength; + return preTabLength; } } // Anchor "#boo" - return surtLength; + return preTabLength; } public String next() { Modified: trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/NewSurtTokenizerTest.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/NewSurtTokenizerTest.java 2008-02-27 11:10:51 UTC (rev 2205) +++ trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/NewSurtTokenizerTest.java 2008-02-27 11:12:04 UTC (rev 2206) @@ -161,5 +161,28 @@ } + public void testListDiffing() { + NewSurtTokenizer tok = new NewSurtTokenizer("(org,archive,www,)/fishes/"); + NewSurtTokenizer tok2 = new NewSurtTokenizer("(org,archive,www,)/fishes/pinky.html"); + assertEquals("pinky.html", tok2.toList().get(tok.toList().size())); + } + + public void testTrailingTab() { + NewSurtTokenizer tok = new NewSurtTokenizer("(org,archive,www,)/fishes/pinky.html?moo=yes&bar=12#423\t"); + Iterator<String> it = tok.iterator(); + + assertEquals("(", it.next()); + assertEquals("org,", it.next()); + assertEquals("archive,", it.next()); + assertEquals("www,", it.next()); + assertEquals(")/", it.next()); + assertEquals("fishes/", it.next()); + assertEquals("pinky.html", it.next()); + assertEquals("?moo=yes&bar=12", it.next()); + assertEquals("#423", it.next()); + assertEquals("\t", it.next()); + assertFalse(it.hasNext()); + + } } Modified: trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/SURTTokenizer2Test.java =================================================================== --- trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/SURTTokenizer2Test.java 2008-02-27 11:10:51 UTC (rev 2205) +++ trunk/archive-access/projects/access-control/access-control/src/test/java/org/archive/surt/SURTTokenizer2Test.java 2008-02-27 11:12:04 UTC (rev 2206) @@ -102,7 +102,7 @@ /** test */ public void testLongPathMore() { tok = toSurtT("http://brad.www.archive.org/one/two"); - assertEquals("(org,archive,www,brad,)/one/two\t",tok.nextSearch()); + assertEquals("( org, archive, www, brad, )/ one/ two\t",tok.nextSearch()); assertEquals("(org,archive,www,brad,)/one/two",tok.nextSearch()); assertEquals("(org,archive,www,brad,)/one",tok.nextSearch()); assertEquals("(org,archive,www,brad,",tok.nextSearch()); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |