From: <bra...@us...> - 2007-04-04 22:16:33
|
Revision: 1693 http://archive-access.svn.sourceforge.net/archive-access/?rev=1693&view=rev Author: bradtofel Date: 2007-04-04 15:15:47 -0700 (Wed, 04 Apr 2007) Log Message: ----------- moving test cases to tests subdir Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/test/ trunk/archive-access/projects/wayback/wayback-core/src/test/java/ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/FileDownloaderTest.java Copied: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/FileDownloaderTest.java (from rev 1692, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/FileDownloaderTest.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/FileDownloaderTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/FileDownloaderTest.java 2007-04-04 22:15:47 UTC (rev 1693) @@ -0,0 +1,69 @@ +/* FileDownloaderTest + * + * $Id$ + * + * Created on 3:46:13 PM Jan 25, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-svn. + * + * wayback-svn is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-svn is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-svn; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.util; + +//import java.io.File; +//import java.net.URL; + +import junit.framework.TestCase; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class FileDownloaderTest extends TestCase { + /** + * @throws Exception + */ + public void testDownload() throws Exception { +// URL url = new URL("http://homeserver.us.archive.org/~brad/tmp.del.gz"); +// String wantHex = "01051ca0aabef856e9bdcee4ac23f66f"; +// File tmp = File.createTempFile("tmp","del"); +// FileDownloader downloader = new FileDownloader(); +// downloader.setDigest(true); +// downloader.download(url,tmp); +// assertTrue(tmp.exists()); +// assertEquals(downloader.getLastDigest(),wantHex); +// assertTrue(tmp.delete()); + } + + /** + * @throws Exception + */ + public void testDownloadGZ() throws Exception { +// URL url = new URL("http://homeserver.us.archive.org/~brad/tmp.del.gz"); +// String wantHex = "765dcbfb102670a6e75859599cb38fe4"; +// File tmp = File.createTempFile("tmp","del"); +// FileDownloader downloader = new FileDownloader(); +// downloader.setDigest(true); +// downloader.downloadGZ(url,tmp); +// assertTrue(tmp.exists()); +// assertEquals(downloader.getLastDigest(),wantHex); +// assertTrue(tmp.delete()); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-04-04 22:26:30
|
Revision: 1696 http://archive-access.svn.sourceforge.net/archive-access/?rev=1696&view=rev Author: bradtofel Date: 2007-04-04 15:26:32 -0700 (Wed, 04 Apr 2007) Log Message: ----------- moved remaining *Test.java to src/test/java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterTest.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/core/TimestampTest.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndexTest.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBLogTest.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBTest.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/surt/SURTTokenizerTest.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/CachedFileTest.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/UrlCanonicalizerTest.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterTest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagixTest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/TimestampTest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndexTest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBLogTest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBTest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/surt/SURTTokenizerTest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/CachedFileTest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizerTest.java Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterTest.java 2007-04-04 22:22:24 UTC (rev 1695) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterTest.java 2007-04-04 22:26:32 UTC (rev 1696) @@ -1,85 +0,0 @@ -/* RobotExclusionFilterTest - * - * $Id$ - * - * Created on 2:55:58 PM Mar 21, 2007. - * - * Copyright (C) 2007 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback-svn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.accesscontrol.robotstxt; - -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import junit.framework.TestCase; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class RobotExclusionFilterTest extends TestCase { - - /** - * - */ - public void testFoo() { - String re = "^www[0-9]+\\."; - Pattern p = Pattern.compile(re); - String url = "www4.archive.org"; - Matcher m = p.matcher(url); - assertTrue(m.find()); - - } - - /** - * - */ - public void testSearchResultToRobotUrlStrings() { - RobotExclusionFilter f = new RobotExclusionFilter(null,"",100); - String test1[] = {"www.foo.com","foo.com"}; - compareListTo(f.searchResultToRobotUrlStrings("www.foo.com"),test1); - - String test2[] = {"foo.com","www.foo.com"}; - compareListTo(f.searchResultToRobotUrlStrings("foo.com"),test2); - - String test3[] = {"fool.foo.com","www.fool.foo.com"}; - compareListTo(f.searchResultToRobotUrlStrings("fool.foo.com"),test3); - - String test4[] = {"www4.foo.com","www.foo.com","foo.com"}; - compareListTo(f.searchResultToRobotUrlStrings("www4.foo.com"),test4); - - String test5[] = {"www4w.foo.com"}; - compareListTo(f.searchResultToRobotUrlStrings("www4w.foo.com"),test5); - - String test6[] = {"www.www.foo.com","www.foo.com"}; - compareListTo(f.searchResultToRobotUrlStrings("www.www.foo.com"),test6); - } - - private void compareListTo(List list, String strings[]) { - assertEquals(list.size(), strings.length); - for(int i = 0; i < strings.length; i++) { - String listS = (String) list.get(i); - String arrayS = "http://" + strings[i] + "/robots.txt"; - assertEquals(listS, arrayS); - } - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagixTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagixTest.java 2007-04-04 22:22:24 UTC (rev 1695) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagixTest.java 2007-04-04 22:26:32 UTC (rev 1696) @@ -1,233 +0,0 @@ -/* TagMagixTest - * - * $Id$ - * - * Created on 6:36:07 PM Feb 14, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.archivalurl; - -import java.util.Properties; - -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.SearchResult; -import org.archive.wayback.exception.ConfigurationException; - -import junit.framework.TestCase; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class TagMagixTest extends TestCase { - - /** - * Test method for 'org.archive.wayback.archivalurl.TagMagix.markupTag(StringBuffer, String, String, String, String, String)' - */ - public void testMarkupTag() { - - - // simple simple -- no quotes at all - checkMarkup( - "<A HREF=http://goofy.com/>", - "<A HREF=http://web.archive.org/wayback/2004/http://goofy.com/>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // same test with lower case - checkMarkup( - "<a href=http://goofy.com/>", - "<a href=http://web.archive.org/wayback/2004/http://goofy.com/>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // with funky mixed case - checkMarkup( - "<a hREF=http://goofy.com/>", - "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // more funky mixed case, this time in the attribute to replace argument - checkMarkup( - "<a hREF=http://goofy.com/>", - "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", - "A","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // another funky case permutation, this time in the tagname to replace - checkMarkup( - "<a hREF=http://goofy.com/>", - "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", - "a","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // with double quotes - checkMarkup( - "<A HREF=\"http://goofy.com/\">", - "<A HREF=\"http://web.archive.org/wayback/2004/http://goofy.com/\">", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // single quotes - checkMarkup( - "<A HREF='http://goofy.com/'>", - "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // two tags - checkMarkup( - "<A HREF='http://goofy.com/'><A HREF='http://goofier.com/'>", - "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'><A HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // two tags with newline: - checkMarkup( - "<A HREF='http://goofy.com/'>\n<A HREF='http://goofier.com/'>", - "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'>\n<A HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - - // two tags in "page" but only asking to update one of them - checkMarkup( - "<A HREF='http://goofy.com/'><B HREF='http://goofier.com/'>", - "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'><B HREF='http://goofier.com/'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // two tags, asking to update the other. - checkMarkup( - "<A HREF='http://goofy.com/'><B HREF='http://goofier.com/'>", - "<A HREF='http://goofy.com/'><B HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", - "B","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // simple path relative - checkMarkup( - "<A HREF='index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // simple server relative but irrelavant -- still at top level - checkMarkup( - "<A HREF='/index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // server relative but with non directory base url - checkMarkup( - "<A HREF='/index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir"); - - // server relative being significant - checkMarkup( - "<A HREF='/index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // path relative with non-directory base url - checkMarkup( - "<A HREF='index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir"); - - // path relative in subdirectory - checkMarkup( - "<A HREF='index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/dir/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // don't touch a "malformed" attribute (no closing apos) - checkMarkup( - "<A HREF='index.html>", - "<A HREF='index.html>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // don't touch a "malformed" attribute (no differing quotes around attribute.) - checkMarkup( - "<A HREF='index.html\">", - "<A HREF='index.html\">", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // same as last, but reversed: don't touch a "malformed" attribute (no differing quotes around attribute.) - checkMarkup( - "<A HREF=\"index.html'>", - "<A HREF=\"index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newline in attribute - checkMarkup( - "<A HREF='/index.html'\n FOO='bar'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'\n FOO='bar'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newlines in attribute - checkMarkup( - "<link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\">", - "<link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\">", - "link","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newlines in attribute, plus extra - checkMarkup( - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\"></b>", - "link","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newlines in attribute, plus extra, diff case - checkMarkup( - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\"></b>", - "LINK","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newlines in attribute, plus extra, diff case, no protocol - checkMarkup( - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://archive.org/_style/style.css\"></b>", - "LINK","HREF","http://web.archive.org/wayback/","2004","archive.org/dir/"); - - // Javascript escaped quote attribute: - checkMarkup( - "document.write(\"<link rel=\\\"stylesheet\\\" type=\\\"text/css\\\" href=\\\"/css/print.css\\\" />\");", - "document.write(\"<link rel=\\\"stylesheet\\\" type=\\\"text/css\\\" href=\\\"http://web.archive.org/wayback/2004/http://boogle.org/css/print.css\\\" />\");", - "LINK","HREF","http://web.archive.org/wayback/","2004","http://boogle.org/dir/"); - - - } - - - private void checkMarkup(String orig, String want, String tag, String attr, String prefix, String ts, String url) { - StringBuilder buf = new StringBuilder(orig); - SearchResult result = new SearchResult(); - result.put(WaybackConstants.RESULT_CAPTURE_DATE,ts); - if(url.startsWith("http://")) { - url = url.substring(7); - } - result.put(WaybackConstants.RESULT_URL,url); - ArchivalUrlResultURIConverter uriC = new ArchivalUrlResultURIConverter(); - Properties initp = new Properties(); - initp.put("replayuriprefix",prefix); - try { - uriC.init(initp); - } catch (ConfigurationException e) { - e.printStackTrace(); - assertTrue("failed initialization of uriCovnerter " + e.getMessage(), - false); - } - - TagMagix.markupTagREURIC(buf,uriC,result,url,tag,attr); - String marked = buf.toString(); - assertEquals(want,marked); - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/TimestampTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/TimestampTest.java 2007-04-04 22:22:24 UTC (rev 1695) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/TimestampTest.java 2007-04-04 22:26:32 UTC (rev 1696) @@ -1,89 +0,0 @@ -/* TimestampTest - * - * $Id$ - * - * Created on 6:44:30 PM Jan 11, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.core; - -import junit.framework.TestCase; - -import org.archive.wayback.core.Timestamp; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class TimestampTest extends TestCase { - /** - * run several padding tests - */ - public void testPadDateStr() { - - assertEquals("padStart '1'","19960101000000",Timestamp.padStartDateStr("1")); - assertEquals("padEnd '1'","19991231235959",Timestamp.padEndDateStr("1")); - assertEquals("padStart '2'","20000101000000",Timestamp.padStartDateStr("2")); - assertEquals("padEnd","20071231235959",Timestamp.padEndDateStr("2")); - assertEquals("padEnd","20071231235959",Timestamp.padEndDateStr("3")); - assertEquals("padEnd","20061231235959",Timestamp.padEndDateStr("2006")); - assertEquals("padEnd","20061231235959",Timestamp.padEndDateStr("200613")); - assertEquals("padEnd","20071231235959",Timestamp.padEndDateStr("2007")); - - - // day of month stuff: - assertEquals("padEnd","20060131235959",Timestamp.padEndDateStr("200601")); - assertEquals("padEnd","20060228235959",Timestamp.padEndDateStr("200602")); - assertEquals("padEnd","20060331235959",Timestamp.padEndDateStr("200603")); - assertEquals("padEnd","20060430235959",Timestamp.padEndDateStr("200604")); - assertEquals("padEnd","20060430235959",Timestamp.padEndDateStr("2006044")); - - assertEquals("padEnd","20050228235959",Timestamp.padEndDateStr("200502")); - assertEquals("padEnd","20040229235959",Timestamp.padEndDateStr("200402")); - assertEquals("padEnd","20030228235959",Timestamp.padEndDateStr("200302")); - - assertEquals("padEnd","19960229235959",Timestamp.padEndDateStr("199602")); - assertEquals("padStart","19960201000000",Timestamp.padStartDateStr("199602")); - - assertEquals("padStart","19960101000000",Timestamp.padStartDateStr("19960")); - assertEquals("padEnd","19960930235959",Timestamp.padEndDateStr("19960")); - - assertEquals("padStart","19961001000000",Timestamp.padStartDateStr("19961")); - assertEquals("padEnd","19961231235959",Timestamp.padEndDateStr("19961")); - - assertEquals("padStart","19961001000000",Timestamp.padStartDateStr("19962")); - assertEquals("padEnd","19961231235959",Timestamp.padEndDateStr("19962")); - - assertEquals("padStart","19960101000050",Timestamp.padStartDateStr("19960101000060")); - assertEquals("padEnd","19960101000050",Timestamp.padEndDateStr("19960101000060")); - - - } - /** - * - */ - public void testConstructors() { - int sse = 1147986348; - String dateSpec = "20060518210548"; - assertEquals("bad fromSSe",dateSpec,Timestamp.fromSse(sse).getDateStr()); - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndexTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndexTest.java 2007-04-04 22:22:24 UTC (rev 1695) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndexTest.java 2007-04-04 22:26:32 UTC (rev 1696) @@ -1,217 +0,0 @@ -/* AlphaPartitionedIndexTest - * - * $Id$ - * - * Created on 5:01:05 PM Jan 25, 2007. - * - * Copyright (C) 2007 Internet Archive. - * - * This file is part of wayback-svn. - * - * wayback-svn is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback-svn is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback-svn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourceindex.distributed; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.Properties; - -import org.apache.commons.httpclient.URIException; -import org.archive.wayback.WaybackConstants; -import org.archive.wayback.core.WaybackRequest; -import org.archive.wayback.exception.BadQueryException; -import org.archive.wayback.exception.ResourceIndexNotAvailableException; - -import junit.framework.TestCase; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class AlphaPartitionedIndexTest extends TestCase { - - private File rangeMapFile; - private AlphaPartitionedIndex index = null; - - /* - * @see TestCase#setUp() - */ - protected void setUp() throws Exception { - super.setUp(); - createRangeMapFile(); - index = new AlphaPartitionedIndex(); - Properties p = new Properties(); - p.put(AlphaPartitionedIndex.RANGE_MAP_PATH,rangeMapFile.getAbsolutePath()); - p.put(AlphaPartitionedIndex.RANGE_CHECK_INTERVAL,"1000"); - index.init(p); - } - - /* - * @see TestCase#tearDown() - */ - protected void tearDown() throws Exception { - super.tearDown(); - rangeMapFile.delete(); - } - - /** - * @throws Exception - */ - public void testFindRange() throws Exception { - testFindRange(index,"bam.com/","b"); - testFindRange(index,"banana.com/","c"); - testFindRange(index,"banana.net/","c"); - testFindRange(index,"banana.au/","b"); - testFindRange(index,"ape.com/","a"); - testFindRange(index,"apple.com/","b"); - testFindRange(index,"aardvark.com/","a"); - testFindRange(index,"dantheman.com/","d"); - testFindRange(index,"cool.com/","c"); - testFindRange(index,"cups.com/","d"); - testFindRange(index,"zoo.com/","d"); - testFindRange(index,"207.241.2.2/","a"); - testFindRange(index,"zztop.com/","d"); - } - - /** - * @throws Exception - */ - public void testGroupBalance() throws Exception { - WaybackRequest r = new WaybackRequest(); - r.put(WaybackConstants.REQUEST_URL,index.canonicalize("apple.com/")); - RangeGroup g = index.getRangeGroupForRequest(r); - assertEquals(g.getName(),"b"); - RangeMember b1 = g.findBestMember(); - assertEquals(b1.getUrlBase(),"b1"); - b1.noteConnectionStart(); - // b1 => 1 - // b2 => 0 - RangeMember b2 = g.findBestMember(); - assertEquals(b2.getUrlBase(),"b2"); - b2.noteConnectionStart(); - // b1 => 1 - // b2 => 1 - b1.noteConnectionStart(); - // b1 => 2 - // b2 => 1 - RangeMember b2_2 = g.findBestMember(); - assertEquals(b2_2.getUrlBase(),"b2"); - b1.noteConnectionSuccess(); - // b1 => 1 - // b2 => 1 - RangeMember b1_2 = g.findBestMember(); - assertEquals(b1_2.getUrlBase(),"b1"); - b1.noteConnectionStart(); - // b1 => 2 - // b2 => 1 - RangeMember b2_3 = g.findBestMember(); - assertEquals(b2_3.getUrlBase(),"b2"); - b2_3.noteConnectionStart(); - // b1 => 2 - // b2 => 2 - b1_2.noteConnectionSuccess(); - // b1 => 1 - // b2 => 2 - RangeMember b1_3 = g.findBestMember(); - assertEquals(b1_3.getUrlBase(),"b1"); - b1_3.noteConnectionStart(); - // b1 => 2 - // b2 => 2 - RangeMember b1_4 = g.findBestMember(); - assertEquals(b1_4.getUrlBase(),"b1"); - b1_4.noteConnectionStart(); - // b1 => 3 - // b2 => 2 - b2_3.noteConnectionSuccess(); - // b1 => 3 - // b2 => 1 - assertEquals(g.findBestMember().getUrlBase(),"b2"); - g.findBestMember().noteConnectionStart(); - // b1 => 3 - // b2 => 2 - assertEquals(g.findBestMember().getUrlBase(),"b2"); - assertEquals(g.findBestMember().getUrlBase(),"b2"); - g.findBestMember().noteConnectionStart(); - // b1 => 3 - // b2 => 3 - assertEquals(g.findBestMember().getUrlBase(),"b1"); - b1.noteConnectionSuccess(); - // b1 => 2 - // b2 => 3 - assertEquals(g.findBestMember().getUrlBase(),"b1"); - b1.noteConnectionFailure(); - // b1 => 1-X - // b2 => 3 - assertEquals(g.findBestMember().getUrlBase(),"b2"); - b2.noteConnectionStart(); - // b1 => 1-X - // b2 => 4 - assertEquals(g.findBestMember().getUrlBase(),"b2"); - b2.noteConnectionStart(); - // b1 => 1-X - // b2 => 5 - - // HACKHACK: how to sleep for 1 ms? - long one = System.currentTimeMillis(); - int two = 0; - while(System.currentTimeMillis() <= one) { - two++; - } - - b1.noteConnectionSuccess(); - // b1 => 0 - // b2 => 5 - assertEquals(g.findBestMember().getUrlBase(),"b1"); - b1.noteConnectionStart(); - // b1 => 1 - // b2 => 5 - b1.noteConnectionStart(); - b1.noteConnectionStart(); - b1.noteConnectionStart(); - b1.noteConnectionStart(); - b1.noteConnectionStart(); - // b1 => 6 - // b2 => 5 - assertEquals(g.findBestMember().getUrlBase(),"b2"); - b2.noteConnectionStart(); - // b1 => 6 - // b2 => 6 - assertEquals(g.findBestMember().getUrlBase(),"b1"); - } - - private void testFindRange(final AlphaPartitionedIndex apIndex, - final String url, final String wantGroup) throws URIException, - BadQueryException, ResourceIndexNotAvailableException { - WaybackRequest r = new WaybackRequest(); - r.put(WaybackConstants.REQUEST_URL,apIndex.canonicalize(url)); - RangeGroup g = apIndex.getRangeGroupForRequest(r); - assertEquals(g.getName(),wantGroup); - } - - private void createRangeMapFile() throws IOException { - rangeMapFile = File.createTempFile("range-map","tmp"); - FileWriter writer = new FileWriter(rangeMapFile); - StringBuilder sb = new StringBuilder(); - sb.append("d cups.com/ zorro.com/ d1 d2\n"); - sb.append("b apple.com/ banana.com/ b1 b2\n"); - sb.append("a apple.com/ a1 a2\n"); - sb.append("c banana.com/ cups.com/ c1 c2\n"); - writer.write(sb.toString()); - writer.close(); - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBLogTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBLogTest.java 2007-04-04 22:22:24 UTC (rev 1695) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBLogTest.java 2007-04-04 22:26:32 UTC (rev 1696) @@ -1,95 +0,0 @@ -/* FileLocationDBLogTest - * - * $Id$ - * - * Created on 4:54:04 PM Aug 21, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.http; - -import java.io.File; -import java.util.Iterator; -import junit.framework.TestCase; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class FileLocationDBLogTest extends TestCase { - FileLocationDBLog log; - protected void setUp() throws Exception { - super.setUp(); - File tmp = File.createTempFile("fldb","log"); - - log = new FileLocationDBLog(tmp.getAbsolutePath()); - } - - /* - * @see TestCase#tearDown() - */ - protected void tearDown() throws Exception { - super.tearDown(); - if(!log.delete()) { - fail("FAILED to delete tmp file"); - } - } - - /** - * @throws Exception - */ - public void testEmptyFile() throws Exception { - String newArc1 = "foo.arc.gz"; - String newArc2 = "bar.arc.gz"; - long mark1 = log.getCurrentMark(); - assertEquals(mark1,0); - Iterator itr = log.getArcsBetweenMarks(0,0); - assertFalse(itr.hasNext()); - log.addArc(newArc1); - long mark2 = log.getCurrentMark(); - assertEquals(newArc1.length() + 1,mark2); - itr = log.getArcsBetweenMarks(mark1,mark2); - assertTrue(itr.hasNext()); - String gotArc = (String) itr.next(); - assertFalse(itr.hasNext()); - assertTrue(newArc1.equals(gotArc)); - log.addArc(newArc2); - long mark3 = log.getCurrentMark(); - assertEquals(newArc1.length() + newArc2.length() + 2, mark3); - - itr = log.getArcsBetweenMarks(mark2,mark3); - assertTrue(itr.hasNext()); - gotArc = (String) itr.next(); - assertFalse(itr.hasNext()); - assertTrue(newArc2.equals(gotArc)); - - itr = log.getArcsBetweenMarks(mark1,mark3); - assertTrue(itr.hasNext()); - gotArc = (String) itr.next(); - assertTrue(newArc1.equals(gotArc)); - - assertTrue(itr.hasNext()); - gotArc = (String) itr.next(); - assertTrue(newArc2.equals(gotArc)); - - assertFalse(itr.hasNext()); - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBTest.java 2007-04-04 22:22:24 UTC (rev 1695) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBTest.java 2007-04-04 22:26:32 UTC (rev 1696) @@ -1,201 +0,0 @@ -/* FileLocationDBTest - * - * $Id$ - * - * Created on 5:17:23 PM Aug 21, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.resourcestore.http; - -import java.io.File; -import java.io.IOException; -import java.util.Iterator; -import java.util.Properties; - -import org.archive.wayback.resourcestore.http.FileLocationDB; - -import junit.framework.TestCase; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class FileLocationDBTest extends TestCase { - private FileLocationDB db = null; - private String dbPath = null; - private String dbName = null; - private File tmpFile = null; - private File tmpLogFile = null; - - /* - * @see TestCase#setUp() - */ - protected void setUp() throws Exception { - - tmpFile = File.createTempFile("test-FileLocationDB",".tmp"); - tmpLogFile = File.createTempFile("test-FileLocationDB",".log"); - assertTrue(tmpFile.delete()); - assertTrue(tmpFile.mkdirs()); - dbPath = tmpFile.getAbsolutePath(); - dbName = "test-FileLocationDB"; - db = new FileLocationDB(); - Properties p = new Properties(); - p.setProperty(FileLocationDB.ARC_DB_PATH,dbPath); - p.setProperty(FileLocationDB.ARC_DB_NAME,dbName); - p.setProperty(FileLocationDB.ARC_DB_LOG,tmpLogFile.getAbsolutePath()); - - db.init(p); - - super.setUp(); - } - - /* - * @see TestCase#tearDown() - */ - protected void tearDown() throws Exception { - super.tearDown(); - db.shutdownDB(); - if(tmpFile.isDirectory()) { - File files[] = tmpFile.listFiles(); - for(int i = 0; i < files.length; i++) { - assertTrue(files[i].isFile()); - assertTrue(files[i].delete()); - } - assertTrue(tmpFile.delete()); - } - assertTrue(tmpLogFile.delete()); - } - - private void testMarkLength(long start, long end, int count) throws IOException { - Iterator itr = db.getArcsBetweenMarks(start,end); - int found = 0; - while(itr.hasNext()) { - itr.next(); - found++; - } - assertEquals(count,found); - } - - /** - * Test method for 'org.archive.wayback.http11resourcestore.FileLocationDB - */ - public void testDB() { - assertNotNull(db); - String urls[] = null; - try { - // empty results OK: - urls = db.arcToUrls("arc1"); - assertNull(urls); - //assertEquals(urls.length,0); - testMarkLength(0,0,0); - - // add an URL, and get it back: - db.addArcUrl("arc1","url1"); - urls = db.arcToUrls("arc1"); - assertNotNull(urls); - assertEquals(1,urls.length); - assertEquals("url1",urls[0]); - testMarkLength(0,5,1); - - // add the same URL again, verify only comes back once: - db.addArcUrl("arc1","url1"); - urls = db.arcToUrls("arc1"); - assertNotNull(urls); - assertEquals(1,urls.length); - assertEquals("url1",urls[0]); - testMarkLength(0,5,1); - - // check for empty results for a diff arc: - urls = db.arcToUrls("arc2"); - assertNull(urls); - //assertEquals(urls.length,0); - - // add a diff URL for first arc, verify both come back: - db.addArcUrl("arc1","url2"); - urls = db.arcToUrls("arc1"); - assertNotNull(urls); - assertEquals(2,urls.length); - assertEquals("url1",urls[0]); - assertEquals("url2",urls[1]); - testMarkLength(0,5,1); - - // still nothing for arc2: - urls = db.arcToUrls("arc2"); - assertNull(urls); - //assertEquals(urls.length,0); - - // add an URL for arc2, and get it back: - db.addArcUrl("arc2","url2-1"); - urls = db.arcToUrls("arc2"); - assertNotNull(urls); - assertEquals(1,urls.length); - assertEquals("url2-1",urls[0]); - testMarkLength(0,10,2); - testMarkLength(5,10,1); - - // remove unknown URL for arc2 - db.removeArcUrl("arc2","url2-2"); - urls = db.arcToUrls("arc2"); - assertNotNull(urls); - assertEquals(1,urls.length); - assertEquals("url2-1",urls[0]); - - // remove the right URL for arc2 - db.removeArcUrl("arc2","url2-1"); - urls = db.arcToUrls("arc2"); - assertNull(urls); - //assertEquals(urls.length,0); - - // remove non-existant URL for first arc, verify two still come back - db.removeArcUrl("arc1","url-non"); - urls = db.arcToUrls("arc1"); - assertNotNull(urls); - assertEquals(2,urls.length); - assertEquals("url1",urls[0]); - assertEquals("url2",urls[1]); - - // remove a right URL for arc1 - db.removeArcUrl("arc1","url1"); - urls = db.arcToUrls("arc1"); - assertNotNull(urls); - assertEquals(1,urls.length); - assertEquals("url2",urls[0]); - - // remove a now wrong URL for arc1 - db.removeArcUrl("arc1","url1"); - urls = db.arcToUrls("arc1"); - assertNotNull(urls); - assertEquals(1,urls.length); - assertEquals("url2",urls[0]); - - // remove a last URL for arc1 - db.removeArcUrl("arc1","url2"); - urls = db.arcToUrls("arc1"); - assertNull(urls); - //assertEquals(urls.length,0); - - } catch (Exception e) { - fail("arcToUrls threw " + e.getMessage()); - } - - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/surt/SURTTokenizerTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/surt/SURTTokenizerTest.java 2007-04-04 22:22:24 UTC (rev 1695) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/surt/SURTTokenizerTest.java 2007-04-04 22:26:32 UTC (rev 1696) @@ -1,228 +0,0 @@ -/* SURTTokenizerTest - * - * $Id$ - * - * Created on 3:40:18 PM May 11, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.surt; - -import junit.framework.TestCase; - -import org.apache.commons.httpclient.URIException; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class SURTTokenizerTest extends TestCase { - - SURTTokenizer tok; - /** - * Test method for 'org.archive.wayback.accesscontrol.SURTTokenizer.nextSearch()' - */ - public void testSimple() { - tok = toSurtT("http://www.archive.org/foo"); - assertEquals("(org,archive,www,)/foo\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/foo",tok.nextSearch()); - assertEquals("(org,archive,www,)/",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testSlashPathTrailingSlash() { - tok = toSurtT("http://www.archive.org/foo/"); - assertEquals("(org,archive,www,)/foo/\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/foo/",tok.nextSearch()); - assertEquals("(org,archive,www,)/foo",tok.nextSearch()); - assertEquals("(org,archive,www,)/",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testSlashPathTwoDirs() { - tok = toSurtT("http://www.archive.org/foo/bar"); - assertEquals("(org,archive,www,)/foo/bar\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/foo/bar",tok.nextSearch()); - assertEquals("(org,archive,www,)/foo",tok.nextSearch()); - assertEquals("(org,archive,www,)/",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testSlashPathTwoDirsTrailingSlash() { - tok = toSurtT("http://www.archive.org/foo/bar/"); - assertEquals("(org,archive,www,)/foo/bar/\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/foo/bar/",tok.nextSearch()); - assertEquals("(org,archive,www,)/foo/bar",tok.nextSearch()); - assertEquals("(org,archive,www,)/foo",tok.nextSearch()); - assertEquals("(org,archive,www,)/",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - - /** test */ - public void testSlashPath() { - tok = toSurtT("http://www.archive.org/"); - assertEquals("(org,archive,www,)/\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - - /** test */ - public void testEmptyPath() { - tok = toSurtT("http://www.archive.org"); - assertEquals("(org,archive,www,)/\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - - - /** test */ - public void testEmptyPathMore() { - tok = toSurtT("http://brad.www.archive.org"); - assertEquals("(org,archive,www,brad,)/\t",tok.nextSearch()); - assertEquals("(org,archive,www,brad,)/",tok.nextSearch()); - assertEquals("(org,archive,www,brad",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testShortPathHash() { - tok = toSurtT("http://www.archive.org/one/two#hash"); - assertEquals("(org,archive,www,)/one/two\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/one/two",tok.nextSearch()); - assertEquals("(org,archive,www,)/one",tok.nextSearch()); - assertEquals("(org,archive,www,)/",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testCGI1() { - tok = toSurtT("http://www.archive.org/cgi?foobar"); - assertEquals("(org,archive,www,)/cgi?foobar\t",tok.nextSearch()); - assertEquals("(org,archive,www,)/cgi?foobar",tok.nextSearch()); - assertEquals("(org,archive,www,)/cgi",tok.nextSearch()); - assertEquals("(org,archive,www,)/",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testPort() { - tok = toSurtT("http://www.archive.org:8080/cgi?foobar"); - assertEquals("(org,archive,www,:8080)/cgi?foobar\t",tok.nextSearch()); - assertEquals("(org,archive,www,:8080)/cgi?foobar",tok.nextSearch()); - assertEquals("(org,archive,www,:8080)/cgi",tok.nextSearch()); - assertEquals("(org,archive,www,:8080)/",tok.nextSearch()); - assertEquals("(org,archive,www,:8080",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testLogin() { - tok = toSurtT("http://br...@ww.../cgi?foobar"); - assertEquals("(org,archive,www,@brad)/cgi?foobar\t",tok.nextSearch()); - assertEquals("(org,archive,www,@brad)/cgi?foobar",tok.nextSearch()); - assertEquals("(org,archive,www,@brad)/cgi",tok.nextSearch()); - assertEquals("(org,archive,www,@brad)/",tok.nextSearch()); - assertEquals("(org,archive,www,@brad",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testLoginPass() { - tok = toSurtT("http://brad:pa...@ww.../cgi?foobar"); - assertEquals("(org,archive,www,@brad:pass)/cgi?foobar\t",tok.nextSearch()); - assertEquals("(org,archive,www,@brad:pass)/cgi?foobar",tok.nextSearch()); - assertEquals("(org,archive,www,@brad:pass)/cgi",tok.nextSearch()); - assertEquals("(org,archive,www,@brad:pass)/",tok.nextSearch()); - assertEquals("(org,archive,www,@brad:pass",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testPreSURTedLoginPass() { - tok = toSurtT("(org,archive,www,@brad:pass)/cgi?foobar"); - assertEquals("(org,archive,www,@brad:pass)/cgi?foobar\t",tok.nextSearch()); - assertEquals("(org,archive,www,@brad:pass)/cgi?foobar",tok.nextSearch()); - assertEquals("(org,archive,www,@brad:pass)/cgi",tok.nextSearch()); - assertEquals("(org,archive,www,@brad:pass)/",tok.nextSearch()); - assertEquals("(org,archive,www,@brad:pass",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - /** test */ - public void testLoginPassPort() { - tok = toSurtT("http://brad:pa...@ww...:8080/cgi?foobar"); - assertEquals("(org,archive,www,:8080@brad:pass)/cgi?foobar\t",tok.nextSearch()); - assertEquals("(org,archive,www,:8080@brad:pass)/cgi?foobar",tok.nextSearch()); - assertEquals("(org,archive,www,:8080@brad:pass)/cgi",tok.nextSearch()); - assertEquals("(org,archive,www,:8080@brad:pass)/",tok.nextSearch()); - assertEquals("(org,archive,www,:8080@brad:pass",tok.nextSearch()); - assertEquals("(org,archive,www,:8080",tok.nextSearch()); - assertEquals("(org,archive,www",tok.nextSearch()); - assertEquals("(org,archive",tok.nextSearch()); - assertEquals("(org",tok.nextSearch()); - assertNull(tok.nextSearch()); - } - - - private SURTTokenizer toSurtT(final String u) { - SURTTokenizer tok = null; - try { - tok = new SURTTokenizer(u); - } catch (URIException e) { - e.printStackTrace(); - assertFalse("URL Exception " + e.getLocalizedMessage(),true); - } - return tok; - } - -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/CachedFileTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/CachedFileTest.java 2007-04-04 22:22:24 UTC (rev 1695) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/CachedFileTest.java 2007-04-04 22:26:32 UTC (rev 1696) @@ -1,62 +0,0 @@ -/* CachedFileTest - * - * $Id$ - * - * Created on 3:44:24 PM Jan 25, 2007. - * - * Copyright (C) 2007 Internet Archive. - * - * This file is part of wayback-svn. - * - * wayback-svn is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback-svn is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback-svn; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.util; - -//import java.io.File; -//import java.net.URL; -//import java.util.Iterator; - -import junit.framework.TestCase; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class CachedFileTest extends TestCase { - /** - * @throws Exception - */ - public void testCachedFile() throws Exception { -// File file = File.createTempFile("tmp","del"); -// URL url = new URL("http://homeserver.us.archive.org/~brad/tmp.del"); -// long checkMS = 10; -// CachedFile cf = new CachedFile(file,url,checkMS); -// Iterator itr = cf.getSequentialIterator(); -// assertTrue(itr.hasNext()); -// assertEquals("a one two three",(String)itr.next()); -// assertTrue(itr.hasNext()); -// assertEquals("b four five size",(String)itr.next()); -// assertTrue(itr.hasNext()); -// assertEquals("c seven eight",(String)itr.next()); -// assertTrue(itr.hasNext()); -// assertEquals("d nine ten",(String)itr.next()); -// assertTrue(itr.hasNext()); -// assertEquals("e eleven twelve thirteen fourteen fifteen",(String)itr.next()); -// assertFalse(itr.hasNext()); -// file.delete(); - } -} Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizerTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizerTest.java 2007-04-04 22:22:24 UTC (rev 1695) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizerTest.java 2007-04-04 22:26:32 UTC (rev 1696) @@ -1,199 +0,0 @@ -/* UrlCanonicalizerTest - * - * $Id$ - * - * Created on 2:13:36 PM Oct 11, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of Wayback. - * - * Wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * Wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with Wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.util; - -import org.apache.commons.httpclient.URIException; - -import junit.framework.TestCase; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class UrlCanonicalizerTest extends TestCase { - private UrlCanonicalizer canonicalizer = new UrlCanonicalizer(); - /** - * Test method for 'org.archive.wayback.cdx.CDXRecord.urlStringToKey(String)' - */ - public void testUrlStringToKey() { - - checkAuthority("foo.com",true); - checkAuthority("foo.con",false); - checkAuthority("foo.de",true); - checkAuthority("foo.denny",false); - checkAuthority("1.1.1.1",true); - checkAuthority("23.4.4.foo",false); - checkAuthority("23.4.4.com",true); - checkAuthority("com.23.4.4.134",false); - - - // simple strip of http:// - checkCanonicalization("http://foo.com/","foo.com/"); - -// would be nice to handle other protocols... -// // simple strip of https:// -// checkCanonicalization("https://foo.com/","foo.com/"); -// -// // simple strip of ftp:// -// checkCanonicalization("ftp://foo.com/","foo.com/"); -// -// // simple strip of rtsp:// -// checkCanonicalization("rtsp://foo.com/","foo.com/"); - - // strip leading 'www.' - checkCanonicalization("http://www.foo.com/","foo.com/"); - - // add trailing '/' with empty path - checkCanonicalization("http://www.foo.com","foo.com/"); - - // strip leading 'www##.' - checkCanonicalization("http://www12.foo.com/","foo.com/"); - - // strip leading 'www##.' with no protocol - checkCanonicalization("www12.foo.com/","foo.com/"); - - - // leave alone an url with no protocol but non-empty path - checkCanonicalization("foo.com/","foo.com/"); - - // add trailing '/' with empty path and without protocol - checkCanonicalization("foo.com","foo.com/"); - - // add trailing '/' to with empty path and no protocol, plus massage - checkCanonicalization("www12.foo.com","foo.com/"); - - // do not add trailing '/' non-empty path and without protocol - checkCanonicalization("foo.com/boo","foo.com/boo"); - - // replace escaped ' ' with '+' in path - checkCanonicalization("foo.com/pa%20th","foo.com/pa+th"); - - // replace escaped ' ' with '+' in path plus kill trailing slash -// checkCanonicalization("foo.com/pa%20th/","foo.com/pa+th"); - - // replace multiple consecutive /'s in path - checkCanonicalization("foo.com//goo","foo.com/goo"); - - // replace multiple consecutive /'s in path - checkCanonicalization("foo.com///goo","foo.com/goo"); - - // leave alone consecutive /'s after ? - checkCanonicalization("foo.com/b?jar=//goo","foo.com/b?jar=//goo"); - - // replace multiple consecutive /'s in path, plus kill trailing / -// checkCanonicalization("foo.com///goo/","foo.com/goo"); - - // replace escaped ' ' with '+' in path plus keep trailing slash and query - checkCanonicalization("foo.com/pa%20th?a=b","foo.com/pa+th?a=b"); - - - // replace escaped ' ' with '+' in path but not in query key - checkCanonicalization("foo.com/pa%20th?a%20a=b","foo.com/pa+th?a%20a=b"); - - // replace escaped ' ' with '+' in path but not in query value - checkCanonicalization("foo.com/pa%20th?a=b%20b","foo.com/pa+th?a=b%20b"); - - // replace escaped ' ' with '+' in path, unescape legal '!' in path - // no change in query escaping - checkCanonicalization("foo.com/pa%20t%21h?a%20a=b","foo.com/pa+t!h?a%20a=b"); - - // replace escaped ' ' with '+' in path, leave illegal '%02' in path - // no change in query escaping - checkCanonicalization("foo.com/pa%20t%02h?a%20a=b","foo.com/pa+t%02h?a%20a=b"); - - // strip jsessionid - String sid1 = "jsessionid=0123456789abcdefghijklemopqrstuv"; - String sid2 = "PHPSESSID=9682993c8daa2c5497996114facdc805"; - String sid3 = "sid=9682993c8daa2c5497996114facdc805"; - String sid4 = "ASPSESSIONIDAQBSDSRT=EOHBLBDDPFCLHKPGGKLILNAM"; - String sid5 = "CFID=12412453&CFTOKEN=15501799"; - //String sid6 = "CFID=3304324&CFTOKEN=57491900&jsessionid=a63098d96360$B0$D9$A"; - - String fore = "http://foo.com/bar?bo=lo&"; - String aft = "&gum=yum"; - String want = "foo.com/bar?bo=lo&gum=yum"; -// String fore = "http://www.archive.org/index.html?"; -// String aft = ""; -// String want = "archive.org/index.html"; - - checkCanonicalization(fore + sid1 + aft,want); - checkCanonicalization(fore + sid2 + aft,want); - checkCanonicalization(fore + sid3 + aft,want); - checkCanonicalization(fore + sid4 + aft,want); - checkCanonicalization(fore + sid5 + aft,want); - //checkCanonicalization(fore + sid6 + aft,want); - - // Check ASP_SESSIONID2: - checkCanonicalization( - "http://legislature.mi.gov/(S(4hqa0555fwsecu455xqckv45))/mileg.aspx", - "legislature.mi.gov/mileg.aspx"); - - // Check ASP_SESSIONID2 (again): - checkCanonicalization( - "http://legislature.mi.gov/(4hqa0555fwsecu455xqckv45)/mileg.aspx", - "legislature.mi.gov/mileg.aspx"); - - // Check ASP_SESSIONID3: - checkCanonicalization( - "http://legislature.mi.gov/(a(4hqa0555fwsecu455xqckv45)S(4hqa0555fwsecu455xqckv45)f(4hqa0555fwsecu455xqckv45))/mileg.aspx?page=sessionschedules", - "legislature.mi.gov/(a(4hqa0555fwsecu455xqckv45)f(4hqa0555fwsecu455xqckv45))/mileg.aspx?page=sessionschedules"); - - // strip port 80 - checkCanonicalization("http://www.chub.org:80/foo","chub.org/foo"); - - // but not other ports... - checkCanonicalization("http://www.chub.org:8080/foo","chub.org:8080/foo"); - - } - - private void checkAuthority(String s, boolean want) { - boolean got = canonicalizer.isAuthority(s); - if(want) { - assertTrue("String("+s+") could be an Authority",want == got); - } else { - assertTrue("String("+s+") is not an Authority",want == got); - } - } - - private void checkCanonicalization(String orig, String want) { - String got; - try { - got = canonicalizer.urlStringToKey(orig); - assertEquals("Failed canonicalization (" + orig + ") => (" + got + - ") and not (" + want + ") as expected",want,got); - - String got2 = canonicalizer.urlStringToKey(got); - assertEquals("Failed 2nd canonicalization (" + got + ") => (" + - got2 + ") and not (" + want + ") as expected",want,got2); - - - } catch (URIException e) { - e.printStackTrace(); - assertTrue("Exception converting(" + orig + ")",false); - } - } -} Copied: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterTest.java (from rev 1692, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterTest.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/accesscontrol/robotstxt/RobotExclusionFilterTest.java 2007-04-04 22:26:32 UTC (rev 1696) @@ -0,0 +1,85 @@ +/* RobotExclusionFilterTest + * + * $Id$ + * + * Created on 2:55:58 PM Mar 21, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-svn; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.accesscontrol.robotstxt; + +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import junit.framework.TestCase; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class RobotExclusionFilterTest extends TestCase { + + /** + * + */ + public void testFoo() { + String re = "^www[0-9]+\\."; + Pattern p = Pattern.compile(re); + String url = "www4.archive.org"; + Matcher m = p.matcher(url); + assertTrue(m.find()); + + } + + /** + * + */ + public void testSearchResultToRobotUrlStrings() { + RobotExclusionFilter f = new RobotExclusionFilter(null,"",100); + String test1[] = {"www.foo.com","foo.com"}; + compareListTo(f.searchResultToRobotUrlStrings("www.foo.com"),test1); + + String test2[] = {"foo.com","www.foo.com"}; + compareListTo(f.searchResultToRobotUrlStrings("foo.com"),test2); + + String test3[] = {"fool.foo.com","www.fool.foo.com"}; + compareListTo(f.searchResultToRobotUrlStrings("fool.foo.com"),test3); + + String test4[] = {"www4.foo.com","www.foo.com","foo.com"}; + compareListTo(f.searchResultToRobotUrlStrings("www4.foo.com"),test4); + + String test5[] = {"www4w.foo.com"}; + compareListTo(f.searchResultToRobotUrlStrings("www4w.foo.com"),test5); + + String test6[] = {"www.www.foo.com","www.foo.com"}; + compareListTo(f.searchResultToRobotUrlStrings("www.www.foo.com"),test6); + } + + private void compareListTo(List list, String strings[]) { + assertEquals(list.size(), strings.length); + for(int i = 0; i < strings.length; i++) { + String listS = (String) list.get(i); + String arrayS = "http://" + strings[i] + "/robots.txt"; + assertEquals(listS, arrayS); + } + } +} Copied: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java (from rev 1692, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagixTest.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java 2007-04-04... [truncated message content] |
From: <bra...@us...> - 2007-07-25 00:17:12
|
Revision: 1856 http://archive-access.svn.sourceforge.net/archive-access/?rev=1856&view=rev Author: bradtofel Date: 2007-07-24 17:17:15 -0700 (Tue, 24 Jul 2007) Log Message: ----------- TWEAK: type safety Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDB.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBLog.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBServlet.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBLogTest.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDB.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDB.java 2007-07-25 00:15:38 UTC (rev 1855) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDB.java 2007-07-25 00:17:15 UTC (rev 1856) @@ -184,7 +184,8 @@ * @return Iterator for traversing arcs between start and end. * @throws IOException */ - public CloseableIterator getArcsBetweenMarks(long start, long end) throws IOException { + public CloseableIterator<String> getArcsBetweenMarks(long start, long end) + throws IOException { return log.getArcsBetweenMarks(start, end); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBLog.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBLog.java 2007-07-25 00:15:38 UTC (rev 1855) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBLog.java 2007-07-25 00:17:15 UTC (rev 1856) @@ -87,7 +87,7 @@ * @return CleanableIterator that returns all arcs between start and end * @throws IOException */ - public CloseableIterator getArcsBetweenMarks(long start, long end) + public CloseableIterator<String> getArcsBetweenMarks(long start, long end) throws IOException { RandomAccessFile raf = new RandomAccessFile(this, "r"); @@ -107,7 +107,7 @@ writer.close(); } - private class BufferedRangeIterator implements CloseableIterator { + private class BufferedRangeIterator implements CloseableIterator<String> { private RecordIterator itr; private long bytesToSend; private long bytesSent; @@ -156,7 +156,7 @@ /* (non-Javadoc) * @see java.util.Iterator#next() */ - public Object next() { + public String next() { String returnString = next; next = null; bytesSent += returnString.length() + 1; // TODO: not X-platform! Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBServlet.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBServlet.java 2007-07-25 00:15:38 UTC (rev 1855) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourcestore/http/FileLocationDBServlet.java 2007-07-25 00:17:15 UTC (rev 1856) @@ -112,7 +112,7 @@ long start = Long.parseLong(getRequiredMapParam(queryMap, START_ARGUMENT)); long end = Long.parseLong(getRequiredMapParam(queryMap, END_ARGUMENT)); - Iterator itr = locationDB.getArcsBetweenMarks(start,end); + Iterator<String> itr = locationDB.getArcsBetweenMarks(start,end); StringBuilder str = new StringBuilder(); str.append("OK "); while(itr.hasNext()) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBLogTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBLogTest.java 2007-07-25 00:15:38 UTC (rev 1855) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBLogTest.java 2007-07-25 00:17:15 UTC (rev 1856) @@ -61,7 +61,7 @@ String newArc2 = "bar.arc.gz"; long mark1 = log.getCurrentMark(); assertEquals(mark1,0); - Iterator itr = log.getArcsBetweenMarks(0,0); + Iterator<String> itr = log.getArcsBetweenMarks(0,0); assertFalse(itr.hasNext()); log.addArc(newArc1); long mark2 = log.getCurrentMark(); Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBTest.java 2007-07-25 00:15:38 UTC (rev 1855) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourcestore/http/FileLocationDBTest.java 2007-07-25 00:17:15 UTC (rev 1856) @@ -84,7 +84,7 @@ } private void testMarkLength(long start, long end, int count) throws IOException { - Iterator itr = db.getArcsBetweenMarks(start,end); + Iterator<String> itr = db.getArcsBetweenMarks(start,end); int found = 0; while(itr.hasNext()) { itr.next(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-07-26 21:53:52
|
Revision: 1895 http://archive-access.svn.sourceforge.net/archive-access/?rev=1895&view=rev Author: bradtofel Date: 2007-07-26 14:53:47 -0700 (Thu, 26 Jul 2007) Log Message: ----------- REFACTOR: moved TagMagix from org.archive.wayback.archivalurl to org.archive.wayback.replay, as it is used by several Replay Renderers. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/timeline/TimelineReplayRenderer.java Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java Removed Paths: ------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagix.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java 2007-07-26 21:47:22 UTC (rev 1894) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/JSReplayRenderer.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -39,6 +39,7 @@ import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.replay.BaseReplayRenderer; +import org.archive.wayback.replay.TagMagix; import org.archive.wayback.util.StringFormatter; import org.archive.wayback.util.UrlCanonicalizer; Deleted: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagix.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagix.java 2007-07-26 21:47:22 UTC (rev 1894) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagix.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -1,286 +0,0 @@ -/* TagMagix - * - * $Id$ - * - * Created on 5:17:27 PM Feb 14, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.archivalurl; - -import java.util.HashMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.archive.wayback.ResultURIConverter; -import org.archive.wayback.util.UrlCanonicalizer; - -/** - * Library for updating arbitrary attributes in arbitrary tags to rewrite HTML - * documents so URI references point back into the Wayback Machine. Attempts to - * make minimal changes so nothing gets broken during this process. - * - * @author brad - * @version $Date$, $Revision: - * 1668 $ - */ -public class TagMagix { - - private static HashMap<String, Pattern> pcPatterns = - new HashMap<String, Pattern>(); - - private static HashMap<String, Pattern> wholeTagPatterns = - new HashMap<String, Pattern>(); - - private static HashMap<String, Pattern> attrPatterns = - new HashMap<String, Pattern>(); - - private static String QUOTED_ATTR_VALUE = "(?:\"[^\">]*\")"; - - private static String ESC_QUOTED_ATTR_VALUE = "(?:\\\\\"[^>\\\\]*\\\\\")"; - - private static String APOSED_ATTR_VALUE = "(?:'[^'>]*')"; - - private static String RAW_ATTR_VALUE = "(?:[^ \\t\\n\\x0B\\f\\r>\"']+)"; - - private static String ANY_ATTR_VALUE = QUOTED_ATTR_VALUE + "|" - + APOSED_ATTR_VALUE + "|" + ESC_QUOTED_ATTR_VALUE + "|" - + RAW_ATTR_VALUE; - - /** - * get (and cache) a regex Pattern for locating an HTML attribute value - * within a particular tag. if found, the pattern will have the attribute - * value in group 1. Note that the attribute value may contain surrounding - * apostrophe(') or quote(") characters. - * - * @param tagName - * @param attrName - * @return Pattern to match the tag-attribute's value - */ - private synchronized static Pattern getPattern(String tagName, - String attrName) { - - String key = tagName + " " + attrName; - Pattern pc = pcPatterns.get(key); - if (pc == null) { - - String tagPatString = "<\\s*" + tagName + "\\s+[^>]*\\b" + attrName - + "\\s*=\\s*(" + ANY_ATTR_VALUE + ")(?:\\s|>)?"; - - pc = Pattern.compile(tagPatString, Pattern.CASE_INSENSITIVE); - pcPatterns.put(key, pc); - } - return pc; - } - - /** - * get (and cache) a regex Pattern for locating an entire HTML start tag. - * - * @param tagName - * @return Pattern to match the tag - */ - private synchronized static Pattern getWholeTagPattern(String tagName) { - - Pattern pc = wholeTagPatterns.get(tagName); - if (pc == null) { - - String tagPatString = "<\\s*" + tagName + "\\s+[^>]+>"; - - pc = Pattern.compile(tagPatString, Pattern.CASE_INSENSITIVE); - wholeTagPatterns.put(tagName, pc); - } - return pc; - } - - /** - * get (and cache) a regex Pattern for locating an attribute value within an - * HTML start tag. If this pattern matches, the attribute value will be in - * group(1), and will include surrounding quotes, or apos, if they were - * present in the original HTML. - * - * @param attrName - * @return Pattern to match the attributes value - */ - private synchronized static Pattern getAttrPattern(String attrName) { - - Pattern pc = attrPatterns.get(attrName); - if (pc == null) { - - String attrPatString = "\\b" + attrName + "\\s*=\\s*(" - + ANY_ATTR_VALUE + ")(?:\\s|>)?"; - - pc = Pattern.compile(attrPatString, Pattern.CASE_INSENSITIVE); - attrPatterns.put(attrName, pc); - } - return pc; - } - - /** - * Alter the HTML document in page, updating URLs in the attrName attributes - * of all tagName tags such that: - * - * 1) absolute URLs are prefixed with: wmPrefix + pageTS 2) server-relative - * URLs are prefixed with: wmPrefix + pageTS + (host of page) 3) - * path-relative URLs are prefixed with: wmPrefix + pageTS + (attribute URL - * resolved against pageUrl) - * - * @param page - * @param uriConverter - * @param captureDate - * @param baseUrl which must be absolute - * @param tagName - * @param attrName - */ - public static void markupTagREURIC(StringBuilder page, - ResultURIConverter uriConverter, String captureDate, - String baseUrl, String tagName, String attrName) { - - Pattern tagPat = getPattern(tagName, attrName); - Matcher matcher = tagPat.matcher(page); - - int idx = 0; - while (matcher.find(idx)) { - String url = matcher.group(1); - int origUrlLength = url.length(); - int attrStart = matcher.start(1); - int attrEnd = matcher.end(1); - String quote = ""; - if (url.charAt(0) == '"') { - quote = "\""; - url = url.substring(1, url.length() - 1); - } else if (url.charAt(0) == '\'') { - quote = "'"; - url = url.substring(1, url.length() - 1); - } else if (url.charAt(0) == '\\') { - quote = "\\\""; - url = url.substring(2, url.length() - 2); - } - String finalUrl = UrlCanonicalizer.resolveUrl(baseUrl,url); - String replayUrl = quote - + uriConverter.makeReplayURI(captureDate, finalUrl) + quote; - - int delta = replayUrl.length() - origUrlLength; - page.replace(attrStart, attrEnd, replayUrl); - idx = attrEnd + delta; - } - } - - private static String trimAttrValue(String value) { - if (value.charAt(0) == '"') { - value = value.substring(1, value.length() - 1); - } else if (value.charAt(0) == '\'') { - value = value.substring(1, value.length() - 1); - } - return value; - } - - /** - * find and return the ATTR value within a TAG tag inside the HTML document - * within the StringBuffer page. returns null if no TAG-ATTR is found. - * - * @param page - * @param tag - * @param attr - * @return URL of base-href within page, or null if none is found. - */ - public static String getTagAttr(StringBuilder page, final String tag, - final String attr) { - - String found = null; - Pattern daPattern = TagMagix.getPattern(tag, attr); - Matcher matcher = daPattern.matcher(page); - int idx = 0; - - if (matcher.find(idx)) { - found = matcher.group(1); - found = trimAttrValue(found); - } - - return found; - } - - /** - * Search through the HTML contained in page, returning the value of a - * particular attribute. This version allows matching only tags that contain - * a particular attribute-value pair, which is useful in extracting META tag - * values, for example, in returning the value of the "content" attribute in - * a META tag that also contains an attribute "http-equiv" with a value of - * "Content-Type". All comparision is case-insensitive, but the value - * returned is the original attribute value, as unmolested as possible. - * - * If nothing matches, returns null. - * - * - * @param page - * StringBuilding holding HTML - * @param tag - * String containing tagname of interest - * @param findAttr - * name of attribute within the tag to return - * @param whereAttr - * only match tags with an attribute whereAttr - * @param whereVal - * only match tags with whereAttr having this value - * @return the value of attribute attr in tag where the tag also contains an - * attribute whereAttr, with value whereVal, or null if nothing - * matches. - */ - public static String getTagAttrWhere(StringBuilder page, final String tag, - final String findAttr, final String whereAttr, final String whereVal) { - - Pattern tagPattern = TagMagix.getWholeTagPattern(tag); - Pattern findAttrPattern = getAttrPattern(findAttr); - Pattern whereAttrPattern = getAttrPattern(whereAttr); - Matcher tagMatcher = tagPattern.matcher(page); - - while (tagMatcher.find()) { - String wholeTag = tagMatcher.group(); - Matcher whereAttrMatcher = whereAttrPattern.matcher(wholeTag); - if (whereAttrMatcher.find()) { - String attrValue = whereAttrMatcher.group(1); - attrValue = trimAttrValue(attrValue); - if (attrValue.compareToIgnoreCase(whereVal) == 0) { - // this tag contains the right set, return the value for - // the attribute findAttr: - Matcher findAttrMatcher = findAttrPattern.matcher(wholeTag); - String value = null; - if (findAttrMatcher.find()) { - value = findAttrMatcher.group(1); - value = trimAttrValue(value); - } - return value; - } - // not the tag we want... maybe there is another: loop - } - } - - return null; - } - - /** - * find and return the href value within a BASE tag inside the HTML document - * within the StringBuffer page. returns null if no BASE-HREF is found. - * - * @param page - * @return URL of base-href within page, or null if none is found. - */ - public static String getBaseHref(StringBuilder page) { - return getTagAttr(page, "BASE", "HREF"); - } -} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayRenderer.java 2007-07-26 21:47:22 UTC (rev 1894) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/BaseReplayRenderer.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -42,7 +42,6 @@ import org.archive.wayback.ReplayRenderer; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.WaybackConstants; -import org.archive.wayback.archivalurl.TagMagix; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.UIResults; Copied: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java (from rev 1766, trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/TagMagix.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -0,0 +1,286 @@ +/* TagMagix + * + * $Id$ + * + * Created on 5:17:27 PM Feb 14, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import java.util.HashMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.archive.wayback.ResultURIConverter; +import org.archive.wayback.util.UrlCanonicalizer; + +/** + * Library for updating arbitrary attributes in arbitrary tags to rewrite HTML + * documents so URI references point back into the Wayback Machine. Attempts to + * make minimal changes so nothing gets broken during this process. + * + * @author brad + * @version $Date$, $Revision: + * 1668 $ + */ +public class TagMagix { + + private static HashMap<String, Pattern> pcPatterns = + new HashMap<String, Pattern>(); + + private static HashMap<String, Pattern> wholeTagPatterns = + new HashMap<String, Pattern>(); + + private static HashMap<String, Pattern> attrPatterns = + new HashMap<String, Pattern>(); + + private static String QUOTED_ATTR_VALUE = "(?:\"[^\">]*\")"; + + private static String ESC_QUOTED_ATTR_VALUE = "(?:\\\\\"[^>\\\\]*\\\\\")"; + + private static String APOSED_ATTR_VALUE = "(?:'[^'>]*')"; + + private static String RAW_ATTR_VALUE = "(?:[^ \\t\\n\\x0B\\f\\r>\"']+)"; + + private static String ANY_ATTR_VALUE = QUOTED_ATTR_VALUE + "|" + + APOSED_ATTR_VALUE + "|" + ESC_QUOTED_ATTR_VALUE + "|" + + RAW_ATTR_VALUE; + + /** + * get (and cache) a regex Pattern for locating an HTML attribute value + * within a particular tag. if found, the pattern will have the attribute + * value in group 1. Note that the attribute value may contain surrounding + * apostrophe(') or quote(") characters. + * + * @param tagName + * @param attrName + * @return Pattern to match the tag-attribute's value + */ + private synchronized static Pattern getPattern(String tagName, + String attrName) { + + String key = tagName + " " + attrName; + Pattern pc = pcPatterns.get(key); + if (pc == null) { + + String tagPatString = "<\\s*" + tagName + "\\s+[^>]*\\b" + attrName + + "\\s*=\\s*(" + ANY_ATTR_VALUE + ")(?:\\s|>)?"; + + pc = Pattern.compile(tagPatString, Pattern.CASE_INSENSITIVE); + pcPatterns.put(key, pc); + } + return pc; + } + + /** + * get (and cache) a regex Pattern for locating an entire HTML start tag. + * + * @param tagName + * @return Pattern to match the tag + */ + private synchronized static Pattern getWholeTagPattern(String tagName) { + + Pattern pc = wholeTagPatterns.get(tagName); + if (pc == null) { + + String tagPatString = "<\\s*" + tagName + "\\s+[^>]+>"; + + pc = Pattern.compile(tagPatString, Pattern.CASE_INSENSITIVE); + wholeTagPatterns.put(tagName, pc); + } + return pc; + } + + /** + * get (and cache) a regex Pattern for locating an attribute value within an + * HTML start tag. If this pattern matches, the attribute value will be in + * group(1), and will include surrounding quotes, or apos, if they were + * present in the original HTML. + * + * @param attrName + * @return Pattern to match the attributes value + */ + private synchronized static Pattern getAttrPattern(String attrName) { + + Pattern pc = attrPatterns.get(attrName); + if (pc == null) { + + String attrPatString = "\\b" + attrName + "\\s*=\\s*(" + + ANY_ATTR_VALUE + ")(?:\\s|>)?"; + + pc = Pattern.compile(attrPatString, Pattern.CASE_INSENSITIVE); + attrPatterns.put(attrName, pc); + } + return pc; + } + + /** + * Alter the HTML document in page, updating URLs in the attrName attributes + * of all tagName tags such that: + * + * 1) absolute URLs are prefixed with: wmPrefix + pageTS 2) server-relative + * URLs are prefixed with: wmPrefix + pageTS + (host of page) 3) + * path-relative URLs are prefixed with: wmPrefix + pageTS + (attribute URL + * resolved against pageUrl) + * + * @param page + * @param uriConverter + * @param captureDate + * @param baseUrl which must be absolute + * @param tagName + * @param attrName + */ + public static void markupTagREURIC(StringBuilder page, + ResultURIConverter uriConverter, String captureDate, + String baseUrl, String tagName, String attrName) { + + Pattern tagPat = getPattern(tagName, attrName); + Matcher matcher = tagPat.matcher(page); + + int idx = 0; + while (matcher.find(idx)) { + String url = matcher.group(1); + int origUrlLength = url.length(); + int attrStart = matcher.start(1); + int attrEnd = matcher.end(1); + String quote = ""; + if (url.charAt(0) == '"') { + quote = "\""; + url = url.substring(1, url.length() - 1); + } else if (url.charAt(0) == '\'') { + quote = "'"; + url = url.substring(1, url.length() - 1); + } else if (url.charAt(0) == '\\') { + quote = "\\\""; + url = url.substring(2, url.length() - 2); + } + String finalUrl = UrlCanonicalizer.resolveUrl(baseUrl,url); + String replayUrl = quote + + uriConverter.makeReplayURI(captureDate, finalUrl) + quote; + + int delta = replayUrl.length() - origUrlLength; + page.replace(attrStart, attrEnd, replayUrl); + idx = attrEnd + delta; + } + } + + private static String trimAttrValue(String value) { + if (value.charAt(0) == '"') { + value = value.substring(1, value.length() - 1); + } else if (value.charAt(0) == '\'') { + value = value.substring(1, value.length() - 1); + } + return value; + } + + /** + * find and return the ATTR value within a TAG tag inside the HTML document + * within the StringBuffer page. returns null if no TAG-ATTR is found. + * + * @param page + * @param tag + * @param attr + * @return URL of base-href within page, or null if none is found. + */ + public static String getTagAttr(StringBuilder page, final String tag, + final String attr) { + + String found = null; + Pattern daPattern = TagMagix.getPattern(tag, attr); + Matcher matcher = daPattern.matcher(page); + int idx = 0; + + if (matcher.find(idx)) { + found = matcher.group(1); + found = trimAttrValue(found); + } + + return found; + } + + /** + * Search through the HTML contained in page, returning the value of a + * particular attribute. This version allows matching only tags that contain + * a particular attribute-value pair, which is useful in extracting META tag + * values, for example, in returning the value of the "content" attribute in + * a META tag that also contains an attribute "http-equiv" with a value of + * "Content-Type". All comparision is case-insensitive, but the value + * returned is the original attribute value, as unmolested as possible. + * + * If nothing matches, returns null. + * + * + * @param page + * StringBuilding holding HTML + * @param tag + * String containing tagname of interest + * @param findAttr + * name of attribute within the tag to return + * @param whereAttr + * only match tags with an attribute whereAttr + * @param whereVal + * only match tags with whereAttr having this value + * @return the value of attribute attr in tag where the tag also contains an + * attribute whereAttr, with value whereVal, or null if nothing + * matches. + */ + public static String getTagAttrWhere(StringBuilder page, final String tag, + final String findAttr, final String whereAttr, final String whereVal) { + + Pattern tagPattern = TagMagix.getWholeTagPattern(tag); + Pattern findAttrPattern = getAttrPattern(findAttr); + Pattern whereAttrPattern = getAttrPattern(whereAttr); + Matcher tagMatcher = tagPattern.matcher(page); + + while (tagMatcher.find()) { + String wholeTag = tagMatcher.group(); + Matcher whereAttrMatcher = whereAttrPattern.matcher(wholeTag); + if (whereAttrMatcher.find()) { + String attrValue = whereAttrMatcher.group(1); + attrValue = trimAttrValue(attrValue); + if (attrValue.compareToIgnoreCase(whereVal) == 0) { + // this tag contains the right set, return the value for + // the attribute findAttr: + Matcher findAttrMatcher = findAttrPattern.matcher(wholeTag); + String value = null; + if (findAttrMatcher.find()) { + value = findAttrMatcher.group(1); + value = trimAttrValue(value); + } + return value; + } + // not the tag we want... maybe there is another: loop + } + } + + return null; + } + + /** + * find and return the href value within a BASE tag inside the HTML document + * within the StringBuffer page. returns null if no BASE-HREF is found. + * + * @param page + * @return URL of base-href within page, or null if none is found. + */ + public static String getBaseHref(StringBuilder page) { + return getTagAttr(page, "BASE", "HREF"); + } +} Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/timeline/TimelineReplayRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/timeline/TimelineReplayRenderer.java 2007-07-26 21:47:22 UTC (rev 1894) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/timeline/TimelineReplayRenderer.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -34,11 +34,11 @@ import org.archive.wayback.ResultURIConverter; import org.archive.wayback.WaybackConstants; import org.archive.wayback.archivalurl.JSReplayRenderer; -import org.archive.wayback.archivalurl.TagMagix; import org.archive.wayback.core.Resource; import org.archive.wayback.core.SearchResult; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.replay.TagMagix; import org.archive.wayback.util.StringFormatter; /** Deleted: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java 2007-07-26 21:47:22 UTC (rev 1894) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -1,293 +0,0 @@ -/* TagMagixTest - * - * $Id$ - * - * Created on 6:36:07 PM Feb 14, 2006. - * - * Copyright (C) 2006 Internet Archive. - * - * This file is part of wayback. - * - * wayback is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * any later version. - * - * wayback is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser Public License for more details. - * - * You should have received a copy of the GNU Lesser Public License - * along with wayback; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -package org.archive.wayback.archivalurl; - -import junit.framework.TestCase; - -/** - * - * - * @author brad - * @version $Date$, $Revision$ - */ -public class TagMagixTest extends TestCase { - - // snipped and modified from http://www.sudaneseonline.com/ on 20070418... - // note: leading space in description META content - // note: added newlines in Content-Language META tag - // note: no quotes around Author META content - - String thePage = "<html>\n" + - "<head>\n" + - "<meta http-equiv=\"Content-Language\" \n content=\"ar-eg\">\n" + - "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=windows-1256\">\n" + - "<meta name=\"resource-type\" content=\"document\">\n" + - "<meta name=\"classification\" content=\"News\">\n" + - "<meta name=\"test1234\" content=\"one\ntwo\">\n" + - "<meta name=\"description\" content=\" A voice of the Sudan people on the Internet\">\n" + - - "<meta http-equiv=\"Content-Language\" \n content=\"ar-sa\">\n" + - "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=windows-1256\">\n" + - "<META NAME=\"Author\" CONTENT=Bakri Abubakr http://bayanit.com/>\n" + - "<META NAME=\"Author2\" CONTENT=\"Bakri Abubakr http://bayanit.com/\">\n" + - "</head>\n" + - "<body>foo</body>\n" + - "</html>\n"; - - /** - * Tests the code that finds attribute values in tags - */ - public void testFindAttr() { - - checkAttrValue(thePage,"meta","http-equiv","Content-Language"); - } - /** - * - */ - public void testFindAttrWhere() { - checkAttrWhereValue(thePage,"meta","content","http-equiv", - "Content-Type","text/html; charset=windows-1256"); - - checkAttrWhereValue(thePage,"meta","content","http-equiv", - "Content-Language","ar-eg"); - - checkAttrWhereValue(thePage,"meta","content","name", - "classification","News"); - - checkAttrWhereValue(thePage,"meta","content","name", - "test1234","one\ntwo"); - - checkAttrWhereValue(thePage,"meta","content","name", - "ClAsSification","News"); - - checkAttrWhereValue(thePage,"meta","content","name", - "description"," A voice of the Sudan people on the Internet"); - - checkAttrWhereValue(thePage,"meta","content","name", - "description-no-existo",null); - - checkAttrWhereValue(thePage,"meta","content","name", - "author","Bakri"); - - checkAttrWhereValue(thePage,"meta","content","name", - "author2","Bakri Abubakr http://bayanit.com/"); - } - - - /** - * Test method for 'org.archive.wayback.archivalurl.TagMagix.markupTag(StringBuffer, String, String, String, String, String)' - */ - public void testMarkupTag() { - - - // simple simple -- no quotes at all - checkMarkup( - "<A HREF=http://goofy.com/>", - "<A HREF=http://web.archive.org/wayback/2004/http://goofy.com/>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // same test with lower case - checkMarkup( - "<a href=http://goofy.com/>", - "<a href=http://web.archive.org/wayback/2004/http://goofy.com/>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // with funky mixed case - checkMarkup( - "<a hREF=http://goofy.com/>", - "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // more funky mixed case, this time in the attribute to replace argument - checkMarkup( - "<a hREF=http://goofy.com/>", - "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", - "A","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // another funky case permutation, this time in the tagname to replace - checkMarkup( - "<a hREF=http://goofy.com/>", - "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", - "a","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // with double quotes - checkMarkup( - "<A HREF=\"http://goofy.com/\">", - "<A HREF=\"http://web.archive.org/wayback/2004/http://goofy.com/\">", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // single quotes - checkMarkup( - "<A HREF='http://goofy.com/'>", - "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // two tags - checkMarkup( - "<A HREF='http://goofy.com/'><A HREF='http://goofier.com/'>", - "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'><A HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // two tags with newline: - checkMarkup( - "<A HREF='http://goofy.com/'>\n<A HREF='http://goofier.com/'>", - "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'>\n<A HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - - // two tags in "page" but only asking to update one of them - checkMarkup( - "<A HREF='http://goofy.com/'><B HREF='http://goofier.com/'>", - "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'><B HREF='http://goofier.com/'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // two tags, asking to update the other. - checkMarkup( - "<A HREF='http://goofy.com/'><B HREF='http://goofier.com/'>", - "<A HREF='http://goofy.com/'><B HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", - "B","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // simple path relative - checkMarkup( - "<A HREF='index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // simple server relative but irrelavant -- still at top level - checkMarkup( - "<A HREF='/index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); - - // server relative but with non directory base url - checkMarkup( - "<A HREF='/index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir"); - - // server relative being significant - checkMarkup( - "<A HREF='/index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // path relative with non-directory base url - checkMarkup( - "<A HREF='index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir"); - - // path relative in subdirectory - checkMarkup( - "<A HREF='index.html'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/dir/index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // don't touch a "malformed" attribute (no closing apos) - checkMarkup( - "<A HREF='index.html>", - "<A HREF='index.html>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // don't touch a "malformed" attribute (no differing quotes around attribute.) - checkMarkup( - "<A HREF='index.html\">", - "<A HREF='index.html\">", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // same as last, but reversed: don't touch a "malformed" attribute (no differing quotes around attribute.) - checkMarkup( - "<A HREF=\"index.html'>", - "<A HREF=\"index.html'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newline in attribute - checkMarkup( - "<A HREF='/index.html'\n FOO='bar'>", - "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'\n FOO='bar'>", - "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newlines in attribute - checkMarkup( - "<link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\">", - "<link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\">", - "link","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newlines in attribute, plus extra - checkMarkup( - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\"></b>", - "link","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newlines in attribute, plus extra, diff case - checkMarkup( - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\"></b>", - "LINK","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); - - // newlines in attribute, plus extra, diff case, no protocol - checkMarkup( - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", - "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://archive.org/_style/style.css\"></b>", - "LINK","HREF","http://web.archive.org/wayback/","2004","http://archive.org/dir/"); - - // Javascript escaped quote attribute: - checkMarkup( - "document.write(\"<link rel=\\\"stylesheet\\\" type=\\\"text/css\\\" href=\\\"/css/print.css\\\" />\");", - "document.write(\"<link rel=\\\"stylesheet\\\" type=\\\"text/css\\\" href=\\\"http://web.archive.org/wayback/2004/http://boogle.org/css/print.css\\\" />\");", - "LINK","HREF","http://web.archive.org/wayback/","2004","http://boogle.org/dir/"); - - - } - - private void checkAttrValue(String page, String tag, String attr, - String wantValue) { - StringBuilder sb = new StringBuilder(page); - String foundValue = TagMagix.getTagAttr(sb, tag, attr); - assertEquals(foundValue,wantValue); - } - private void checkAttrWhereValue(String page, String tag, String attr, - String whereAttr, String whereVal, String wantValue) { - StringBuilder sb = new StringBuilder(page); - String foundValue = TagMagix.getTagAttrWhere(sb, tag, attr, whereAttr,whereVal); - if(foundValue != null) { - assertEquals(foundValue,wantValue); - } else { - assertNull(wantValue); - } - } - - private void checkMarkup(String orig, String want, String tag, String attr, String prefix, String ts, String url) { - StringBuilder buf = new StringBuilder(orig); -// if(url.startsWith("http://")) { -// url = url.substring(7); -// } - ArchivalUrlResultURIConverter uriC = new ArchivalUrlResultURIConverter(); - uriC.setReplayURIPrefix(prefix); - TagMagix.markupTagREURIC(buf,uriC,ts,url,tag,attr); - String marked = buf.toString(); - assertEquals(want,marked); - } -} Copied: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java (from rev 1874, trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/archivalurl/TagMagixTest.java) =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java 2007-07-26 21:53:47 UTC (rev 1895) @@ -0,0 +1,296 @@ +/* TagMagixTest + * + * $Id$ + * + * Created on 6:36:07 PM Feb 14, 2006. + * + * Copyright (C) 2006 Internet Archive. + * + * This file is part of wayback. + * + * wayback is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.replay; + +import org.archive.wayback.replay.TagMagix; +import org.archive.wayback.archivalurl.ArchivalUrlResultURIConverter; + +import junit.framework.TestCase; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class TagMagixTest extends TestCase { + + // snipped and modified from http://www.sudaneseonline.com/ on 20070418... + // note: leading space in description META content + // note: added newlines in Content-Language META tag + // note: no quotes around Author META content + + String thePage = "<html>\n" + + "<head>\n" + + "<meta http-equiv=\"Content-Language\" \n content=\"ar-eg\">\n" + + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=windows-1256\">\n" + + "<meta name=\"resource-type\" content=\"document\">\n" + + "<meta name=\"classification\" content=\"News\">\n" + + "<meta name=\"test1234\" content=\"one\ntwo\">\n" + + "<meta name=\"description\" content=\" A voice of the Sudan people on the Internet\">\n" + + + "<meta http-equiv=\"Content-Language\" \n content=\"ar-sa\">\n" + + "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=windows-1256\">\n" + + "<META NAME=\"Author\" CONTENT=Bakri Abubakr http://bayanit.com/>\n" + + "<META NAME=\"Author2\" CONTENT=\"Bakri Abubakr http://bayanit.com/\">\n" + + "</head>\n" + + "<body>foo</body>\n" + + "</html>\n"; + + /** + * Tests the code that finds attribute values in tags + */ + public void testFindAttr() { + + checkAttrValue(thePage,"meta","http-equiv","Content-Language"); + } + /** + * + */ + public void testFindAttrWhere() { + checkAttrWhereValue(thePage,"meta","content","http-equiv", + "Content-Type","text/html; charset=windows-1256"); + + checkAttrWhereValue(thePage,"meta","content","http-equiv", + "Content-Language","ar-eg"); + + checkAttrWhereValue(thePage,"meta","content","name", + "classification","News"); + + checkAttrWhereValue(thePage,"meta","content","name", + "test1234","one\ntwo"); + + checkAttrWhereValue(thePage,"meta","content","name", + "ClAsSification","News"); + + checkAttrWhereValue(thePage,"meta","content","name", + "description"," A voice of the Sudan people on the Internet"); + + checkAttrWhereValue(thePage,"meta","content","name", + "description-no-existo",null); + + checkAttrWhereValue(thePage,"meta","content","name", + "author","Bakri"); + + checkAttrWhereValue(thePage,"meta","content","name", + "author2","Bakri Abubakr http://bayanit.com/"); + } + + + /** + * Test method for 'org.archive.wayback.archivalurl.TagMagix.markupTag(StringBuffer, String, String, String, String, String)' + */ + public void testMarkupTag() { + + + // simple simple -- no quotes at all + checkMarkup( + "<A HREF=http://goofy.com/>", + "<A HREF=http://web.archive.org/wayback/2004/http://goofy.com/>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // same test with lower case + checkMarkup( + "<a href=http://goofy.com/>", + "<a href=http://web.archive.org/wayback/2004/http://goofy.com/>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // with funky mixed case + checkMarkup( + "<a hREF=http://goofy.com/>", + "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // more funky mixed case, this time in the attribute to replace argument + checkMarkup( + "<a hREF=http://goofy.com/>", + "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", + "A","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // another funky case permutation, this time in the tagname to replace + checkMarkup( + "<a hREF=http://goofy.com/>", + "<a hREF=http://web.archive.org/wayback/2004/http://goofy.com/>", + "a","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // with double quotes + checkMarkup( + "<A HREF=\"http://goofy.com/\">", + "<A HREF=\"http://web.archive.org/wayback/2004/http://goofy.com/\">", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // single quotes + checkMarkup( + "<A HREF='http://goofy.com/'>", + "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // two tags + checkMarkup( + "<A HREF='http://goofy.com/'><A HREF='http://goofier.com/'>", + "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'><A HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // two tags with newline: + checkMarkup( + "<A HREF='http://goofy.com/'>\n<A HREF='http://goofier.com/'>", + "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'>\n<A HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + + // two tags in "page" but only asking to update one of them + checkMarkup( + "<A HREF='http://goofy.com/'><B HREF='http://goofier.com/'>", + "<A HREF='http://web.archive.org/wayback/2004/http://goofy.com/'><B HREF='http://goofier.com/'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // two tags, asking to update the other. + checkMarkup( + "<A HREF='http://goofy.com/'><B HREF='http://goofier.com/'>", + "<A HREF='http://goofy.com/'><B HREF='http://web.archive.org/wayback/2004/http://goofier.com/'>", + "B","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // simple path relative + checkMarkup( + "<A HREF='index.html'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // simple server relative but irrelavant -- still at top level + checkMarkup( + "<A HREF='/index.html'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/"); + + // server relative but with non directory base url + checkMarkup( + "<A HREF='/index.html'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir"); + + // server relative being significant + checkMarkup( + "<A HREF='/index.html'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // path relative with non-directory base url + checkMarkup( + "<A HREF='index.html'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir"); + + // path relative in subdirectory + checkMarkup( + "<A HREF='index.html'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/dir/index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // don't touch a "malformed" attribute (no closing apos) + checkMarkup( + "<A HREF='index.html>", + "<A HREF='index.html>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // don't touch a "malformed" attribute (no differing quotes around attribute.) + checkMarkup( + "<A HREF='index.html\">", + "<A HREF='index.html\">", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // same as last, but reversed: don't touch a "malformed" attribute (no differing quotes around attribute.) + checkMarkup( + "<A HREF=\"index.html'>", + "<A HREF=\"index.html'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // newline in attribute + checkMarkup( + "<A HREF='/index.html'\n FOO='bar'>", + "<A HREF='http://web.archive.org/wayback/2004/http://www.archive.org/index.html'\n FOO='bar'>", + "A","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // newlines in attribute + checkMarkup( + "<link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\">", + "<link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\">", + "link","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // newlines in attribute, plus extra + checkMarkup( + "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", + "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\"></b>", + "link","href","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // newlines in attribute, plus extra, diff case + checkMarkup( + "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", + "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://www.archive.org/_style/style.css\"></b>", + "LINK","HREF","http://web.archive.org/wayback/","2004","http://www.archive.org/dir/"); + + // newlines in attribute, plus extra, diff case, no protocol + checkMarkup( + "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"/_style/style.css\"></b>", + "<b><link rel=\"stylesheet\"\n goo=\"1\"\n href=\"http://web.archive.org/wayback/2004/http://archive.org/_style/style.css\"></b>", + "LINK","HREF","http://web.archive.org/wayback/","2004","http://archive.org/dir/"); + + // Javascript escaped quote attribute: + checkMarkup( + "document.write(\"<link rel=\\\"stylesheet\\\" type=\\\"text/css\\\" href=\\\"/css/print.css\\\" />\");", + "document.write(\"<link rel=\\\"stylesheet\\\" type=\\\"text/css\\\" href=\\\"http://web.archive.org/wayback/2004/http://boogle.org/css/print.css\\\" />\");", + "LINK","HREF","http://web.archive.org/wayback/","2004","http://boogle.org/dir/"); + + + } + + private void checkAttrValue(String page, String tag, String attr, + String wantValue) { + StringBuilder sb = new StringBuilder(page); + String foundValue = TagMagix.getTagAttr(sb, tag, attr); + assertEquals(foundValue,wantValue); + } + private void checkAttrWhereValue(String page, String tag, String attr, + String whereAttr, String whereVal, String wantValue) { + StringBuilder sb = new StringBuilder(page); + String foundValue = TagMagix.getTagAttrWhere(sb, tag, attr, whereAttr,whereVal); + if(foundValue != null) { + assertEquals(foundValue,wantValue); + } else { + assertNull(wantValue); + } + } + + private void checkMarkup(String orig, String want, String tag, String attr, String prefix, String ts, String url) { + StringBuilder buf = new StringBuilder(orig); +// if(url.startsWith("http://")) { +// url = url.substring(7); +// } + ArchivalUrlResultURIConverter uriC = new ArchivalUrlResultURIConverter(); + uriC.setReplayURIPrefix(prefix); + TagMagix.markupTagREURIC(buf,uriC,ts,url,tag,attr); + String marked = buf.toString(); + assertEquals(want,marked); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-08-23 21:35:08
|
Revision: 1927 http://archive-access.svn.sourceforge.net/archive-access/?rev=1927&view=rev Author: bradtofel Date: 2007-08-23 14:35:08 -0700 (Thu, 23 Aug 2007) Log Message: ----------- INITIAL REV: simple interface for external applications to leverage some of the Wayback Exclusion capabilities. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExternalExcluder.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/accesscontrol/ExternalExcluderTest.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExternalExcluder.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExternalExcluder.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/accesscontrol/ExternalExcluder.java 2007-08-23 21:35:08 UTC (rev 1927) @@ -0,0 +1,108 @@ +/* ExternalExcluder + * + * $Id$ + * + * Created on 2:33:37 PM Aug 21, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.accesscontrol; + +import org.apache.commons.httpclient.URIException; +import org.archive.net.LaxURI; +import org.archive.wayback.WaybackConstants; +import org.archive.wayback.core.SearchResult; +import org.archive.wayback.core.WaybackRequest; +import org.archive.wayback.util.ObjectFilter; +import org.springframework.beans.factory.xml.XmlBeanFactory; +import org.springframework.core.io.FileSystemResource; +import org.springframework.core.io.Resource; + +/** + * Class which simplifies usage of wayback robots and static map exclusion + * policies and software in external applications. + * + * Uses Spring to construct an ExclusionFilterFactory which handles requests. + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ExternalExcluder { + private static ExclusionFilterFactory factory = null; + private ObjectFilter<SearchResult> filter = null; + private final static String CONFIG_ID = "excluder-factory"; + /** + * @param filter + */ + public ExternalExcluder(ObjectFilter<SearchResult> filter) { + this.filter = filter; + } + /** + * @param urlString + * @param timestamp + * @return true if the url-timestamp should not be shown to end users + */ + public boolean isExcluded(String urlString, String timestamp) { + SearchResult sr = new SearchResult(); + + LaxURI url = null; + String host = null; + try { + url = new LaxURI(urlString,true); + host = url.getHost(); + } catch (URIException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return true; + } + sr.put(WaybackConstants.RESULT_ORIG_HOST, host); + sr.put(WaybackConstants.RESULT_URL, urlString); + + int ruling = filter.filterObject(sr); + return (ruling != ObjectFilter.FILTER_INCLUDE); + } + private static synchronized ExclusionFilterFactory getFactory(String + configPath) { + if(factory != null) { + return factory; + } + Resource resource = new FileSystemResource(configPath); + XmlBeanFactory xmlFactory = new XmlBeanFactory(resource); + factory = (ExclusionFilterFactory) xmlFactory.getBean(CONFIG_ID); + return factory; + } + + /** + * @param configPath + * @return an excluder fully configured via the XML Spring configuration + * at configPath + */ + public static ExternalExcluder getExcluder(String configPath) { + WaybackRequest wbRequest = null; + return new ExternalExcluder(getFactory(configPath).get(wbRequest)); + } + /** + * shutdown underlying resources. + */ + public static synchronized void shutdown() { + if(factory != null) { + factory.shutdown(); + } + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/accesscontrol/ExternalExcluderTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/accesscontrol/ExternalExcluderTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/accesscontrol/ExternalExcluderTest.java 2007-08-23 21:35:08 UTC (rev 1927) @@ -0,0 +1,51 @@ +/* ExternalExcluderTest + * + * $Id$ + * + * Created on 4:01:25 PM Aug 21, 2007. + * + * Copyright (C) 2007 Internet Archive. + * + * This file is part of wayback-core. + * + * wayback-core is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * any later version. + * + * wayback-core is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License + * along with wayback-core; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.wayback.accesscontrol; + +import junit.framework.TestCase; + +/** + * + * + * @author brad + * @version $Date$, $Revision$ + */ +public class ExternalExcluderTest extends TestCase { + String configPath = "/tmp/external.xml"; + /** + * + */ + public void testIt() { + ExternalExcluder excluder = ExternalExcluder.getExcluder(configPath); + //assertFalse(excluder.isExcluded("http://archive.torg/", "20050202020202")); + assertTrue(excluder.isExcluded("http://archive.org/cgi-bin/test.html", "20050202020202")); + assertTrue(excluder.isExcluded("http://www.archive.org/cgi-bin/test.html", "20050202020202")); + assertFalse(excluder.isExcluded("http://www.archive.org/cgi-brin/test.html", "20050202020202")); + assertFalse(excluder.isExcluded("http://archive.org/cgi-brin/test.html", "20050202020202")); + assertFalse(excluder.isExcluded("http://archive.com/cgi-brin/test.html", "20050202020202")); + assertTrue(excluder.isExcluded("http://archives.com/cgi-brin/test.html", "20050202020202")); + ExternalExcluder.shutdown(); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-18 23:36:52
|
Revision: 1991 http://archive-access.svn.sourceforge.net/archive-access/?rev=1991&view=rev Author: bradtofel Date: 2007-09-18 16:36:55 -0700 (Tue, 18 Sep 2007) Log Message: ----------- INITIAL REV: code to assemble arbitrary logic structures. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/AndBooleanOperator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/BinaryBooleanOperator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/BooleanOperator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/FalseBooleanOperator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/NotBooleanOperator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/OrBooleanOperator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/TrueBooleanOperator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/UnaryBooleanOperator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/Utils.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/operator/ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/operator/BooleanOperatorTest.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/AndBooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/AndBooleanOperator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/AndBooleanOperator.java 2007-09-18 23:36:55 UTC (rev 1991) @@ -0,0 +1,11 @@ +package org.archive.wayback.util.operator; + + +public class AndBooleanOperator<E> extends BinaryBooleanOperator<E> { + public boolean isTrue(E value) { + if(operand1 == null) return false; + if(operand2 == null) return false; + return operand1.isTrue(value) && operand2.isTrue(value); + } +} + Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/BinaryBooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/BinaryBooleanOperator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/BinaryBooleanOperator.java 2007-09-18 23:36:55 UTC (rev 1991) @@ -0,0 +1,19 @@ +package org.archive.wayback.util.operator; + + +public abstract class BinaryBooleanOperator<E> implements BooleanOperator<E> { + protected BooleanOperator<E> operand1 = null; + protected BooleanOperator<E> operand2 = null; + public BooleanOperator<E> getOperand1() { + return operand1; + } + public void setOperand1(BooleanOperator<E> operand1) { + this.operand1 = operand1; + } + public BooleanOperator<E> getOperand2() { + return operand2; + } + public void setOperand2(BooleanOperator<E> operand2) { + this.operand2 = operand2; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/BooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/BooleanOperator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/BooleanOperator.java 2007-09-18 23:36:55 UTC (rev 1991) @@ -0,0 +1,5 @@ +package org.archive.wayback.util.operator; + +public interface BooleanOperator<E> { + public boolean isTrue(E value); +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/FalseBooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/FalseBooleanOperator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/FalseBooleanOperator.java 2007-09-18 23:36:55 UTC (rev 1991) @@ -0,0 +1,8 @@ +package org.archive.wayback.util.operator; + + +public class FalseBooleanOperator<E> implements BooleanOperator<E> { + public boolean isTrue(E value) { + return false; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/NotBooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/NotBooleanOperator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/NotBooleanOperator.java 2007-09-18 23:36:55 UTC (rev 1991) @@ -0,0 +1,8 @@ +package org.archive.wayback.util.operator; + + +public class NotBooleanOperator<E> extends UnaryBooleanOperator<E> { + public boolean isTrue(E value) { + return !operand.isTrue(value); + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/OrBooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/OrBooleanOperator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/OrBooleanOperator.java 2007-09-18 23:36:55 UTC (rev 1991) @@ -0,0 +1,10 @@ +package org.archive.wayback.util.operator; + +public class OrBooleanOperator<E> extends BinaryBooleanOperator<E> { + + public boolean isTrue(E value) { + if(operand1 == null) return false; + if(operand2 == null) return false; + return operand1.isTrue(value) || operand2.isTrue(value); + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/TrueBooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/TrueBooleanOperator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/TrueBooleanOperator.java 2007-09-18 23:36:55 UTC (rev 1991) @@ -0,0 +1,8 @@ +package org.archive.wayback.util.operator; + + +public class TrueBooleanOperator<E> implements BooleanOperator<E> { + public boolean isTrue(E value) { + return true; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/UnaryBooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/UnaryBooleanOperator.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/UnaryBooleanOperator.java 2007-09-18 23:36:55 UTC (rev 1991) @@ -0,0 +1,12 @@ +package org.archive.wayback.util.operator; + + +public abstract class UnaryBooleanOperator<E> implements BooleanOperator<E> { + protected BooleanOperator<E> operand = null; + public BooleanOperator<E> getOperand() { + return operand; + } + public void setOperand(BooleanOperator<E> operand) { + this.operand = operand; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/Utils.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/Utils.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/operator/Utils.java 2007-09-18 23:36:55 UTC (rev 1991) @@ -0,0 +1,23 @@ +package org.archive.wayback.util.operator; + +import java.util.ArrayList; +import java.util.List; + +public class Utils { + public static <T> List<BooleanOperator<T>> getOperators(BooleanOperator<T> top) { + ArrayList<BooleanOperator<T>> operators = new ArrayList<BooleanOperator<T>>(); + ArrayList<BooleanOperator<T>> toInspect = new ArrayList<BooleanOperator<T>>(); + toInspect.add(top); + while(toInspect.size() > 0) { + BooleanOperator<T> current = toInspect.remove(0); + operators.add(current); + if(current instanceof UnaryBooleanOperator) { + toInspect.add(((UnaryBooleanOperator<T>)current).getOperand()); + } else if(current instanceof BinaryBooleanOperator) { + toInspect.add(((BinaryBooleanOperator<T>)current).getOperand1()); + toInspect.add(((BinaryBooleanOperator<T>)current).getOperand2()); + } + } + return operators; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/operator/BooleanOperatorTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/operator/BooleanOperatorTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/operator/BooleanOperatorTest.java 2007-09-18 23:36:55 UTC (rev 1991) @@ -0,0 +1,41 @@ +package org.archive.wayback.util.operator; + +import junit.framework.TestCase; + +public class BooleanOperatorTest extends TestCase { + + public void testAll() { + FalseBooleanOperator<Object> fbo = new FalseBooleanOperator<Object>(); + TrueBooleanOperator<Object> tbo = new TrueBooleanOperator<Object>(); + NotBooleanOperator<Object> nbo = new NotBooleanOperator<Object>(); + OrBooleanOperator<Object> obo = new OrBooleanOperator<Object>(); + AndBooleanOperator<Object> abo = new AndBooleanOperator<Object>(); + + assertFalse(fbo.isTrue(null)); + assertTrue(tbo.isTrue(null)); + nbo.setOperand(fbo); + assertTrue(nbo.isTrue(null)); + nbo.setOperand(tbo); + assertFalse(nbo.isTrue(null)); + obo.setOperand1(fbo); + obo.setOperand2(tbo); + assertTrue(obo.isTrue(null)); + + abo.setOperand1(fbo); + abo.setOperand2(tbo); + assertFalse(abo.isTrue(null)); + + abo.setOperand1(tbo); + assertTrue(abo.isTrue(null)); + + obo.setOperand2(fbo); + assertFalse(obo.isTrue(null)); + + abo.setOperand1(nbo); + nbo.setOperand(fbo); + assertTrue(abo.isTrue(null)); + + + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-09-18 23:38:28
|
Revision: 1992 http://archive-access.svn.sourceforge.net/archive-access/?rev=1992&view=rev Author: bradtofel Date: 2007-09-18 16:38:30 -0700 (Tue, 18 Sep 2007) Log Message: ----------- INITIAL REV: this code must exist somewhere else, but here it is again, and not very pretty or optimized, but this parses an IP or IP/netbits form network and then answers queries to determine if a given test IP is within the range. Added Paths: ----------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/IPRange.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/IPRangeTest.java Added: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/IPRange.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/IPRange.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/IPRange.java 2007-09-18 23:38:30 UTC (rev 1992) @@ -0,0 +1,181 @@ +package org.archive.wayback.util; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class IPRange { + + // STATIC MEMBERS: + private final static Pattern IP_PATTERN = + Pattern.compile("(\\d+)\\.(\\d+)\\.(\\d+)\\.(\\d+)"); + private final static Pattern IP_MASK_PATTERN = + Pattern.compile("(\\d+\\.\\d+\\.\\d+\\.\\d+)/(\\d+)"); + private final static byte[] FULL_MASK = + {(byte)0xff,(byte)0xff,(byte)0xff,(byte)0xff}; + + private final static byte[] flags = { + (byte) 0x80, + (byte) 0x40, + (byte) 0x20, + (byte) 0x10, + (byte) 0x08, + (byte) 0x04, + (byte) 0x02, + (byte) 0x01, + }; + + // INSTANCE MEMBERS: + private byte[] ip = null; + private byte[] mask = null; + + // INSTANCE METHODS: + public byte[] getIp() { + return ip; + } + + public byte[] getMask() { + return mask; + } + + public boolean contains(String ipString) { + byte[] testIP = matchIP(ipString); + if(testIP == null) { + return false; + } + return contains(testIP); + } + + public boolean contains(byte[] testIP) { + byte[] masked = and(testIP,mask); + return equals(ip,masked); + } + + public String getRangeString() { + return null; + } + public void setRangeString(String range) { + setRange(range); + } + + public boolean setRange(String range) { + Matcher m = IP_MASK_PATTERN.matcher(range); + if(m != null) { + if(m.matches()) { + return setRangeMask(m.group(1),m.group(2)); + } + } + return setRangeIP(range); + } + + // PRIVATE INSTANCE METHODS: + private boolean setRangeMask(String ipString, String maskBitsString) { + byte[] tmpMask = maskBits(maskBitsString); + if(tmpMask != null) { + if(setRangeIP(ipString)) { + mask = tmpMask; + ip = and(ip,mask); + return true; + } + } + return false; + } + private boolean setRangeIP(String ipString) { + byte[] tmpIp = matchIP(ipString); + if(tmpIp != null) { + ip = tmpIp; + mask = FULL_MASK; + return true; + } + return false; + } + + // STATIC METHODS: + public static byte[] maskBits(String bitsString) { + try { + int bits = Integer.parseInt(bitsString); + return maskBits(bits); + } catch(NumberFormatException e) { + e.printStackTrace(); + } + return null; + } + public static byte[] maskBits(int bits) { + byte[] res = new byte[4]; + if(bits < 0) { + return null; + } + if(bits > 32) { + return null; + } + for(int i=0; i < 4; i++) { + + int startBit = 8 * i; + int endBit = 8 * (i+1); + if(bits < startBit) { + res[i] = (byte)0x00; + } else if(bits >= endBit) { + res[i] = (byte)0xff; + } else { + int numOn = bits - startBit; + int val = 0x00; + for(int j=0; j < numOn; j++) { + val |= flags[j]; + } + res[i] = (byte) val; + } + } + return res; + } + public static String bitString(byte b) { + StringBuilder sb = new StringBuilder(8); + for(int i=0; i<8; i++) { + sb.append(((b & flags[i])==0)?"0":"1"); + } + return sb.toString(); + } + + public static byte[] and(byte b1[], byte b2[]) { + byte[] res = new byte[4]; + for(int i=0; i<4; i++) { + res[i] = (byte) ((byte) b1[i] & (byte) b2[i]); + } + return res; + } + public static boolean equals(byte b1[], byte b2[]) { + for(int i=0; i<4; i++) { + if(b1[i] != b2[i]) { + return false; + } + } + return true; + } + public static boolean isOn(byte b, int pos) { + return (b & flags[pos]) != 0; + } + + public static byte[] matchIP(String ip) { + Matcher m = IP_PATTERN.matcher(ip); + if(m != null) { + if(m.matches()) { + try { + byte[] res = new byte[4]; + for(int i=0; i < 4; i++) { + int testInt = Integer.parseInt(m.group(i+1)); + if(testInt < 0) { + return null; + } + if(testInt > 255) { + return null; + } + res[i] = (byte) testInt; + } + return res; + } catch(NumberFormatException e) { + e.printStackTrace(); + return null; + } + } + } + return null; + } +} Added: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/IPRangeTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/IPRangeTest.java (rev 0) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/IPRangeTest.java 2007-09-18 23:38:30 UTC (rev 1992) @@ -0,0 +1,191 @@ +package org.archive.wayback.util; + +import junit.framework.TestCase; + +public class IPRangeTest extends TestCase { + public void testBitOn() { + byte b = (byte) 0xFF; + for(int i=0; i<8; i++) { + assertTrue(IPRange.isOn(b, i)); + } + b = (byte) 0x80; + assertTrue(IPRange.isOn(b, 0)); + for(int i=1; i<8; i++) { + assertFalse(IPRange.isOn(b, i)); + } + b = (byte) 0xc0; + assertTrue(IPRange.isOn(b, 0)); + assertTrue(IPRange.isOn(b, 1)); + for(int i=2; i<8; i++) { + assertFalse(IPRange.isOn(b, i)); + } + b |= 0x01; + assertTrue(IPRange.isOn(b, 0)); + assertTrue(IPRange.isOn(b, 1)); + assertTrue(IPRange.isOn(b, 7)); + for(int i=2; i<7; i++) { + assertFalse(IPRange.isOn(b, i)); + } + + b = (byte) 0xf0 & (byte) 0x00; + for(int i=0; i<8; i++) { + assertFalse(IPRange.isOn(b, i)); + } + + b = (byte) 0xf0 & (byte) 0x80; + assertTrue(IPRange.isOn(b, 0)); + for(int i=1; i<8; i++) { + assertFalse(IPRange.isOn(b, i)); + } + } + public void testBitString() { + assertEquals("00000000",IPRange.bitString((byte)0x00)); + assertEquals("11111111",IPRange.bitString((byte)0xff)); + assertEquals("11110000",IPRange.bitString((byte)0xf0)); + assertEquals("11100000",IPRange.bitString((byte)0xe0)); + assertEquals("11000000",IPRange.bitString((byte)0xc0)); + assertEquals("11001100",IPRange.bitString((byte)0xcc)); + assertEquals("11001101",IPRange.bitString((byte)0xcd)); + assertEquals("00010000",IPRange.bitString((byte)0x10)); + assertEquals("00010001",IPRange.bitString((byte)0x11)); + } + + public void testMask() { + byte[] b = IPRange.maskBits(0); + assertEquals("00000000",IPRange.bitString(b[0])); + assertEquals("00000000",IPRange.bitString(b[1])); + assertEquals("00000000",IPRange.bitString(b[2])); + assertEquals("00000000",IPRange.bitString(b[3])); + + b = IPRange.maskBits(1); + assertEquals("10000000",IPRange.bitString(b[0])); + assertEquals("00000000",IPRange.bitString(b[1])); + assertEquals("00000000",IPRange.bitString(b[2])); + assertEquals("00000000",IPRange.bitString(b[3])); + + b = IPRange.maskBits(2); + assertEquals("11000000",IPRange.bitString(b[0])); + assertEquals("00000000",IPRange.bitString(b[1])); + assertEquals("00000000",IPRange.bitString(b[2])); + assertEquals("00000000",IPRange.bitString(b[3])); + + b = IPRange.maskBits(9); + assertEquals("11111111",IPRange.bitString(b[0])); + assertEquals("10000000",IPRange.bitString(b[1])); + assertEquals("00000000",IPRange.bitString(b[2])); + assertEquals("00000000",IPRange.bitString(b[3])); + + b = IPRange.maskBits(23); + assertEquals("11111111",IPRange.bitString(b[0])); + assertEquals("11111111",IPRange.bitString(b[1])); + assertEquals("11111110",IPRange.bitString(b[2])); + assertEquals("00000000",IPRange.bitString(b[3])); + + b = IPRange.maskBits(30); + assertEquals("11111111",IPRange.bitString(b[0])); + assertEquals("11111111",IPRange.bitString(b[1])); + assertEquals("11111111",IPRange.bitString(b[2])); + assertEquals("11111100",IPRange.bitString(b[3])); + + b = IPRange.maskBits(31); + assertEquals("11111111",IPRange.bitString(b[0])); + assertEquals("11111111",IPRange.bitString(b[1])); + assertEquals("11111111",IPRange.bitString(b[2])); + assertEquals("11111110",IPRange.bitString(b[3])); + + b = IPRange.maskBits(32); + assertEquals("11111111",IPRange.bitString(b[0])); + assertEquals("11111111",IPRange.bitString(b[1])); + assertEquals("11111111",IPRange.bitString(b[2])); + assertEquals("11111111",IPRange.bitString(b[3])); + + } + public void testParse() { + IPRange r = new IPRange(); + + assertFalse(r.setRange("127.0.0.")); + assertFalse(r.setRange("256.0.0.1")); + assertFalse(r.setRange("0.256.0.0.1")); + assertFalse(r.setRange("0.256.0.0")); + assertFalse(r.setRange("0.0.0.256")); + + assertTrue(r.setRange("127.0.0.1")); + assertTrue(r.setRange("255.0.0.0")); + assertTrue(r.setRange("0.0.0.255")); + assertTrue(r.setRange("127.127.127.127")); + assertTrue(r.setRange("127.127.127.255")); + + assertTrue(r.setRange("128.0.0.0")); + byte[] b = r.getMask(); + assertEquals("11111111",IPRange.bitString(b[0])); + assertEquals("11111111",IPRange.bitString(b[1])); + assertEquals("11111111",IPRange.bitString(b[2])); + assertEquals("11111111",IPRange.bitString(b[3])); + b = r.getIp(); + assertEquals("10000000",IPRange.bitString(b[0])); + assertEquals("00000000",IPRange.bitString(b[1])); + assertEquals("00000000",IPRange.bitString(b[2])); + assertEquals("00000000",IPRange.bitString(b[3])); + + assertTrue(r.setRange("129.0.0.0")); + b = r.getMask(); + assertEquals("11111111",IPRange.bitString(b[0])); + assertEquals("11111111",IPRange.bitString(b[1])); + assertEquals("11111111",IPRange.bitString(b[2])); + assertEquals("11111111",IPRange.bitString(b[3])); + b = r.getIp(); + assertEquals("10000001",IPRange.bitString(b[0])); + assertEquals("00000000",IPRange.bitString(b[1])); + assertEquals("00000000",IPRange.bitString(b[2])); + assertEquals("00000000",IPRange.bitString(b[3])); + + assertTrue(r.setRange("129.0.0.0/30")); + b = r.getMask(); + assertEquals("11111111",IPRange.bitString(b[0])); + assertEquals("11111111",IPRange.bitString(b[1])); + assertEquals("11111111",IPRange.bitString(b[2])); + assertEquals("11111100",IPRange.bitString(b[3])); + b = r.getIp(); + assertEquals("10000001",IPRange.bitString(b[0])); + assertEquals("00000000",IPRange.bitString(b[1])); + assertEquals("00000000",IPRange.bitString(b[2])); + assertEquals("00000000",IPRange.bitString(b[3])); + + assertTrue(r.setRange("129.129.129.129/24")); + b = r.getMask(); + assertEquals("11111111",IPRange.bitString(b[0])); + assertEquals("11111111",IPRange.bitString(b[1])); + assertEquals("11111111",IPRange.bitString(b[2])); + assertEquals("00000000",IPRange.bitString(b[3])); + b = r.getIp(); + assertEquals("10000001",IPRange.bitString(b[0])); + assertEquals("10000001",IPRange.bitString(b[1])); + assertEquals("10000001",IPRange.bitString(b[2])); + assertEquals("00000000",IPRange.bitString(b[3])); + } + + public void testContains() { + IPRange r = new IPRange(); + assertTrue(r.setRange("129.129.129.0/24")); + assertTrue(r.contains("129.129.129.129")); + assertTrue(r.contains("129.129.129.255")); + assertTrue(r.contains("129.129.129.0")); + assertFalse(r.contains("129.129.128.0")); + assertFalse(r.contains("129.129.128.255")); + + assertTrue(r.setRange("129.129.129.129/24")); + assertTrue(r.contains("129.129.129.129")); + assertTrue(r.contains("129.129.129.255")); + assertTrue(r.contains("129.129.129.0")); + assertFalse(r.contains("129.129.128.0")); + assertFalse(r.contains("129.129.128.255")); + + assertTrue(r.setRange("129.129.129.129/25")); + assertTrue(r.contains("129.129.129.128")); + assertTrue(r.contains("129.129.129.129")); + assertTrue(r.contains("129.129.129.255")); + assertFalse(r.contains("129.129.129.0")); + assertFalse(r.contains("129.129.128.0")); + assertFalse(r.contains("129.129.128.255")); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-12-15 02:00:47
|
Revision: 2115 http://archive-access.svn.sourceforge.net/archive-access/?rev=2115&view=rev Author: bradtofel Date: 2007-12-14 18:00:51 -0800 (Fri, 14 Dec 2007) Log Message: ----------- BUGFIX: (unreported) regex was not finding simple tags (ex: "<head>")... Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java 2007-12-12 03:34:13 UTC (rev 2114) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java 2007-12-15 02:00:51 UTC (rev 2115) @@ -100,7 +100,7 @@ Pattern pc = wholeTagPatterns.get(tagName); if (pc == null) { - String tagPatString = "<\\s*" + tagName + "\\s+[^>]+>"; + String tagPatString = "<\\s*" + tagName + "((>)|(\\s+[^>]*>))"; pc = Pattern.compile(tagPatString, Pattern.CASE_INSENSITIVE); wholeTagPatterns.put(tagName, pc); Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java 2007-12-12 03:34:13 UTC (rev 2114) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java 2007-12-15 02:00:51 UTC (rev 2115) @@ -98,7 +98,24 @@ "author2","Bakri Abubakr http://bayanit.com/"); } + public void testFindEndOfFirst() { + findEndOf("<head>","head",6); + findEndOf("<html><head><body>","head",12); + findEndOf("<html><head goo=bar><body>","head",20); + findEndOf("<html><head goo=bar><body>full","body",26); + findEndOf("<html><head goo=bar><body >full","body",27); + findEndOf("<html><head goo=bar><body >full","body",27); + findEndOf("<html><head goo=bar><body yar=bam>full","body",34); + findEndOf("<html><head goo=bar><body yar='bam'>full","body",36); + findEndOf("<html><head goo=bar><body yar=\"bam\">full","body",36); + } + public void findEndOf(String page, String tag, int offset) { + StringBuilder sb = new StringBuilder(page); + int found = TagMagix.getEndOfFirstTag(sb,tag); + assertEquals("FAILED find end of " +tag+ " in ("+page+")",offset,found); + } + /** * Test method for 'org.archive.wayback.archivalurl.TagMagix.markupTag(StringBuffer, String, String, String, String, String)' */ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2007-12-22 00:59:21
|
Revision: 2118 http://archive-access.svn.sourceforge.net/archive-access/?rev=2118&view=rev Author: bradtofel Date: 2007-12-21 16:59:25 -0800 (Fri, 21 Dec 2007) Log Message: ----------- FIX: problem with new apache httpclient.URI where "+"s now get escaped.. Also added a couple more tests. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizer.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/UrlCanonicalizerTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizer.java 2007-12-15 02:02:43 UTC (rev 2117) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/UrlCanonicalizer.java 2007-12-22 00:59:25 UTC (rev 2118) @@ -221,11 +221,20 @@ searchUrl = "http://" + searchUrl; } + // unescape anythying that can be: + UURI tmpURI = UURIFactory.getInstance(searchUrl); + tmpURI.setPath(tmpURI.getPath()); + + // convert to UURI to perform require URI fixup: - UURI searchURI = UURIFactory.getInstance(searchUrl); + UURI searchURI = UURIFactory.getInstance(tmpURI.getURI()); + + + // replace ' ' with '+' (this is only to match Alexa's canonicalization) - String newPath = searchURI.getPath().replace(' ','+'); + String newPath = searchURI.getEscapedPath().replace("%20","+"); +// String newPath = searchURI.getPath().replace(' ','+'); // replace multiple consecutive '/'s in the path. while(newPath.contains("//")) { @@ -237,15 +246,30 @@ // if((newPath.length() > 1) && newPath.endsWith("/")) { // newPath = newPath.substring(0,newPath.length()-1); // } - searchURI.setPath(newPath); +// searchURI.setEscapedPath(newPath); +// searchURI.setRawPath(newPath.toCharArray()); +// String query = searchURI.getEscapedQuery(); // TODO: handle non HTTP port stripping, too. - String portStr = ""; +// String portStr = ""; +// if(searchURI.getPort() != 80 && searchURI.getPort() != -1) { +// portStr = ":" + searchURI.getPort(); +// } +// return searchURI.getHostBasename() + portStr + +// searchURI.getEscapedPathQuery(); + + StringBuilder sb = new StringBuilder(searchUrl.length()); + sb.append(searchURI.getHostBasename()); if(searchURI.getPort() != 80 && searchURI.getPort() != -1) { - portStr = ":" + searchURI.getPort(); + sb.append(":").append(searchURI.getPort()); } - return searchURI.getHostBasename() + portStr + - searchURI.getEscapedPathQuery(); + sb.append(newPath); + if(searchURI.getEscapedQuery() != null) { + sb.append("?").append(searchURI.getEscapedQuery()); + } + + + return sb.toString(); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/UrlCanonicalizerTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/UrlCanonicalizerTest.java 2007-12-15 02:02:43 UTC (rev 2117) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/UrlCanonicalizerTest.java 2007-12-22 00:59:25 UTC (rev 2118) @@ -88,12 +88,17 @@ // do not add trailing '/' non-empty path and without protocol checkCanonicalization("foo.com/boo","foo.com/boo"); + + // TEST + // replace escaped ' ' with '+' in path plus keep trailing slash and query + checkCanonicalization("foo.com/pa%20th?a=b","foo.com/pa+th?a=b"); + // replace escaped ' ' with '+' in path checkCanonicalization("foo.com/pa%20th","foo.com/pa+th"); - // replace escaped ' ' with '+' in path plus kill trailing slash -// checkCanonicalization("foo.com/pa%20th/","foo.com/pa+th"); + // replace escaped ' ' with '+' in path plus leave trailing slash + checkCanonicalization("foo.com/pa%20th/","foo.com/pa+th/"); // replace multiple consecutive /'s in path checkCanonicalization("foo.com//goo","foo.com/goo"); @@ -104,11 +109,11 @@ // leave alone consecutive /'s after ? checkCanonicalization("foo.com/b?jar=//goo","foo.com/b?jar=//goo"); - // replace multiple consecutive /'s in path, plus kill trailing / -// checkCanonicalization("foo.com///goo/","foo.com/goo"); + // replace multiple consecutive /'s in path, plus leave trailing / + checkCanonicalization("foo.com///goo/","foo.com/goo/"); // replace escaped ' ' with '+' in path plus keep trailing slash and query - checkCanonicalization("foo.com/pa%20th?a=b","foo.com/pa+th?a=b"); + checkCanonicalization("foo.com/pa%20th/?a=b","foo.com/pa+th/?a=b"); // replace escaped ' ' with '+' in path but not in query key @@ -117,6 +122,23 @@ // replace escaped ' ' with '+' in path but not in query value checkCanonicalization("foo.com/pa%20th?a=b%20b","foo.com/pa+th?a=b%20b"); + + // no change in '!' escaping + checkCanonicalization("foo.com/pa!th","foo.com/pa!th"); + + // no change in '+' escaping + checkCanonicalization("foo.com/pa+th","foo.com/pa+th"); + + // unescape legal escaped '!' (%21) + checkCanonicalization("foo.com/pa%21th","foo.com/pa!th"); + + // leave '%' (%25) + checkCanonicalization("foo.com/pa%th","foo.com/pa%th"); + + // unescape '%' (%25) + checkCanonicalization("foo.com/pa%25th","foo.com/pa%th"); + + // replace escaped ' ' with '+' in path, unescape legal '!' in path // no change in query escaping checkCanonicalization("foo.com/pa%20t%21h?a%20a=b","foo.com/pa+t!h?a%20a=b"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-02-01 23:53:53
|
Revision: 2170 http://archive-access.svn.sourceforge.net/archive-access/?rev=2170&view=rev Author: bradtofel Date: 2008-02-01 15:53:57 -0800 (Fri, 01 Feb 2008) Log Message: ----------- OPTIMIZ: two major optimizations, now holds URL to run regexs against in a StringBuilder, to reduce String Object construction overhead, and we now do a String compare against a "chooser" string before bothering to test the RegEx against the URLs. BUGFIX: fixed a couple of session ID stripper RegExes that were broken. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizerTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2008-02-01 19:34:06 UTC (rev 2169) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizer.java 2008-02-01 23:53:57 UTC (rev 2170) @@ -51,29 +51,40 @@ * Strip leading 'www.' */ private static final Pattern STRIP_WWW_REGEX = - Pattern.compile("(?i)^(https?://)(?:www\\.)([^/]*/.+)$"); + Pattern.compile("(?i)^(?:https?://)(www[0-9]*\\.)(?:[^/]*/.+)$"); + private static final String STRIP_WWW_CHOOSER = "/www"; +// /** +// * Strip leading 'www44.', 'www3.', etc. +// */ +// private static final Pattern STRIP_WWWN_REGEX = +// Pattern.compile("(?i)^(https?://)(?:www[0-9]+\\.)([^/]*/.+)$"); /** - * Strip leading 'www44.', 'www3.', etc. - */ - private static final Pattern STRIP_WWWN_REGEX = - Pattern.compile("(?i)^(https?://)(?:www[0-9]+\\.)([^/]*/.+)$"); - /** * Strip userinfo. */ private static final Pattern STRIP_USERINFO_REGEX = - Pattern.compile("^((?:(?:https?)|(?:ftps?))://)(?:[^/]+@)(.*)$", + Pattern.compile("^(?:(?:(?:https?)|(?:ftps?))://)([^/]+@)(?:.*)$", Pattern.CASE_INSENSITIVE); + private static final String STRIP_USERINFO_CHOOSER = "@"; /** - * Example: jsessionid=999A9EF028317A82AC83F0FDFE59385A. * Example: PHPSESSID=9682993c8daa2c5497996114facdc805. */ - private static final Pattern STRIP_SESSION_ID_REGEX = - Pattern.compile("^(.+)(?:(?:(?:jsessionid)|(?:phpsessid))=" + - "[0-9a-zA-Z]{32})(?:&(.*))?$", + private static final Pattern STRIP_PHPSESSION_ID_REGEX = + Pattern.compile("^(?:.+)(phpsessid=" + + "[0-9a-zA-Z]{32}&?)(?:(?:.*))?$", Pattern.CASE_INSENSITIVE); + private static final String STRIP_PHPSESSION_ID_CHOOSER = "phpsessid="; + /** + * Example: jsessionid=999A9EF028317A82AC83F0FDFE59385A. + */ + private static final Pattern STRIP_JSESSION_ID_REGEX = + Pattern.compile("^.*(jsessionid=[0-9a-zA-Z]{32}&?).*$", + Pattern.CASE_INSENSITIVE); + private static final String STRIP_JSESSION_ID_CHOOSER = "jsessionid="; + + /** * Example: sid=9682993c8daa2c5497996114facdc805. * 'sid=' can be tricky but all sid= followed by 32 byte string * so far seen have been session ids. Sid is a 32 byte string @@ -81,16 +92,18 @@ * so have to have it run after the phpsessid elimination. */ private static final Pattern STRIP_SID_REGEX = - Pattern.compile("^(.+)" + - "(?:sid=[0-9a-zA-Z]{32})(?:&(.*))?$", Pattern.CASE_INSENSITIVE); + Pattern.compile("^(?:.+)" + + "(sid=[0-9a-zA-Z]{32}&?)(?:(?:.*))?$", Pattern.CASE_INSENSITIVE); + private static final String STRIP_SID_CHOOSER = "sid="; /** * Example:ASPSESSIONIDAQBSDSRT=EOHBLBDDPFCLHKPGGKLILNAM. */ private static final Pattern STRIP_ASPSESSION_REGEX = - Pattern.compile("^(.+)" + - "(?:ASPSESSIONID[a-zA-Z]{8}=[a-zA-Z]{24})(?:&(.*))?$", + Pattern.compile("^(?:.+)" + + "(ASPSESSIONID[a-zA-Z]{8}=[a-zA-Z]{24}&?)(?:(?:.*))?$", Pattern.CASE_INSENSITIVE); + private static final String STRIP_ASPSESSION_CHOOSER = "aspsessionid"; /** * Examples: @@ -108,10 +121,10 @@ * */ private static final Pattern STRIP_ASPSESSION2_REGEX = - Pattern.compile("^([^\\?]+/)" + - "(?:\\((?:S\\(|)[0-9a-z]{24}\\)(?:\\)|)/)([^\\?]+\\.aspx.*)$", + Pattern.compile(".*/(\\([0-9a-z]{24}\\)/)(?:[^\\?]+\\.aspx.*)$", Pattern.CASE_INSENSITIVE); - + private static final String STRIP_ASPSESSION2_CHOOSER = ".aspx"; + /** * Examples: * @@ -123,12 +136,10 @@ * http://msdn2.microsoft.com/en-us/library/aa479315.aspx * */ - private static final Pattern STRIP_ASPSESSION3_REGEX = - Pattern.compile("^([^\\?]+/" + - "\\((?:a\\([0-9a-z]{24}\\)))(?:S\\([0-9a-z]{24}\\))" + - "((?:f\\([0-9a-z]{24}\\))\\)/[^\\?]+\\.aspx.*)$", + Pattern.compile(".*/(\\((?:[a-z]\\([0-9a-z]{24}\\))+\\)/)[^\\?]+\\.aspx.*$", Pattern.CASE_INSENSITIVE); + private static final String STRIP_ASPSESSION3_CHOOSER = ".aspx"; /** * Strip ColdFusion session IDs. Remove sessionids that look like the @@ -137,36 +148,52 @@ * CFID=3304324&CFTOKEN=57491900&jsessionid=a63098d96360$B0$D9$A */ private static final Pattern STRIP_CFSESSION_REGEX = - Pattern.compile("^(.+)(?:cfid=[^&]+&cftoken=[^&]+(?:jsession=[^&]+)?)" + - "(?:&(.*))?$",Pattern.CASE_INSENSITIVE); + Pattern.compile(".+(cfid=[^&]+&cftoken=[^&]+(?:&jsessionid=[^&]+)?&?).*$", + Pattern.CASE_INSENSITIVE); + private static final String STRIP_CFSESSION_CHOOSER = "cftoken="; + + private static final String choosers[] = { + STRIP_USERINFO_CHOOSER, + STRIP_WWW_CHOOSER, + STRIP_PHPSESSION_ID_CHOOSER, + STRIP_JSESSION_ID_CHOOSER, + STRIP_ASPSESSION_CHOOSER, + STRIP_ASPSESSION2_CHOOSER, + STRIP_ASPSESSION3_CHOOSER, + STRIP_SID_CHOOSER, + STRIP_CFSESSION_CHOOSER + }; + private static final Pattern strippers[] = { + STRIP_USERINFO_REGEX, + STRIP_WWW_REGEX, + STRIP_PHPSESSION_ID_REGEX, + STRIP_JSESSION_ID_REGEX, + STRIP_ASPSESSION_REGEX, + STRIP_ASPSESSION2_REGEX, + STRIP_ASPSESSION3_REGEX, + STRIP_SID_REGEX, + STRIP_CFSESSION_REGEX + }; + /** - * Run a regex that strips elements of a string. + * Run a regex against a StringBuilder, removing group 1 if it matches. * * Assumes the regex has a form that wants to strip elements of the passed - * string. Assumes that if a match, appending group 1 - * and group 2 yields desired result. + * string. Assumes that if a match, group 1 should be removed * @param url Url to search in. - * @param matcher Matcher whose form yields a group 1 and group 2 if a - * match (non-null. - * @return Original <code>url</code> else concatenization of group 1 - * and group 2. + * @param matcher Matcher whose form yields a group to remove + * @return true if the StringBuilder was modified */ - protected String doStripRegexMatch(String url, Matcher matcher) { - return (matcher != null && matcher.matches())? - checkForNull(matcher.group(1)) + checkForNull(matcher.group(2)): - url; + protected boolean doStripRegexMatch(StringBuilder url, Matcher matcher) { + if(matcher != null && matcher.matches()) { + url.delete(matcher.start(1), matcher.end(1)); + return true; + } + return false; } /** - * @param string String to check. - * @return <code>string</code> if non-null, else empty string (""). - */ - private String checkForNull(String string) { - return (string != null)? string: ""; - } - - /** * return the canonical string key for the URL argument. * * @param urlString @@ -175,6 +202,9 @@ */ public String urlStringToKey(final String urlString) throws URIException { + if(urlString.startsWith("dns:")) { + return urlString; + } String searchUrl = canonicalize(urlString); // TODO: force https into http for the moment... @@ -195,20 +225,20 @@ searchUrl = "http://" + searchUrl; } - // unescape anythying that can be: + // TODO: These next few lines look crazy -- need to be reworked.. This + // was the only easy way I could find to get the correct unescaping + // out of UURIs, possible a bug. Definitely needs some TLC in any case, + // as building UURIs is *not* a cheap operation. + + // unescape anything that can be: UURI tmpURI = UURIFactory.getInstance(searchUrl); tmpURI.setPath(tmpURI.getPath()); - - // convert to UURI to perform require URI fixup: + // convert to UURI to perform required URI fixup: UURI searchURI = UURIFactory.getInstance(tmpURI.getURI()); - - - // replace ' ' with '+' (this is only to match Alexa's canonicalization) String newPath = searchURI.getEscapedPath().replace("%20","+"); -// String newPath = searchURI.getPath().replace(' ','+'); // replace multiple consecutive '/'s in the path. while(newPath.contains("//")) { @@ -241,12 +271,10 @@ if(searchURI.getEscapedQuery() != null) { sb.append("?").append(searchURI.getEscapedQuery()); } - return sb.toString(); } - /** * Idempotent operation that will determine the 'fuzziest' * form of the url argument. This operation is done prior to adding records @@ -259,19 +287,23 @@ * @return canonicalized version of url argument. */ public String canonicalize(String url) { - url = doStripRegexMatch(url, STRIP_USERINFO_REGEX.matcher(url)); - url = doStripRegexMatch(url, STRIP_WWW_REGEX.matcher(url)); - url = doStripRegexMatch(url, STRIP_WWWN_REGEX.matcher(url)); - url = doStripRegexMatch(url, STRIP_SESSION_ID_REGEX.matcher(url)); - url = doStripRegexMatch(url, STRIP_ASPSESSION_REGEX.matcher(url)); - url = doStripRegexMatch(url, STRIP_ASPSESSION2_REGEX.matcher(url)); - url = doStripRegexMatch(url, STRIP_ASPSESSION3_REGEX.matcher(url)); - url = doStripRegexMatch(url, STRIP_SID_REGEX.matcher(url)); - url = doStripRegexMatch(url, STRIP_CFSESSION_REGEX.matcher(url)); - url = url.toLowerCase(); + if (url == null || url.length() <= 0) { return url; } + + // hang on, we're about to get aggressive: + url = url.toLowerCase(); + StringBuilder sb = new StringBuilder(url); + boolean changed = false; + for(int i=0; i<choosers.length; i++) { + if(sb.indexOf(choosers[i]) != -1) { + changed |= doStripRegexMatch(sb,strippers[i].matcher(sb)); + } + } + if(changed) { + url = sb.toString(); + } int index = url.lastIndexOf('?'); if (index > 0) { @@ -285,8 +317,8 @@ url = url.substring(0, url.length() - 2); } else { // The '&' is redundant. Strip it. - url = url.substring(0, index + 1) + - url.substring(index + 2); + url = url.substring(0, index + 1) + + url.substring(index + 2); } } else if (url.charAt(url.length() - 1) == '&') { // If we have a lone '&' on end of query str, Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizerTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizerTest.java 2008-02-01 19:34:06 UTC (rev 2169) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/AggressiveUrlCanonicalizerTest.java 2008-02-01 23:53:57 UTC (rev 2170) @@ -144,7 +144,7 @@ String sid3 = "sid=9682993c8daa2c5497996114facdc805"; String sid4 = "ASPSESSIONIDAQBSDSRT=EOHBLBDDPFCLHKPGGKLILNAM"; String sid5 = "CFID=12412453&CFTOKEN=15501799"; - //String sid6 = "CFID=3304324&CFTOKEN=57491900&jsessionid=a63098d96360$B0$D9$A"; + String sid6 = "CFID=3304324&CFTOKEN=57491900&jsessionid=a63098d96360$B0$D9$A"; String fore = "http://foo.com/bar?bo=lo&"; String aft = "&gum=yum"; @@ -158,7 +158,7 @@ checkCanonicalization(fore + sid3 + aft,want); checkCanonicalization(fore + sid4 + aft,want); checkCanonicalization(fore + sid5 + aft,want); - //checkCanonicalization(fore + sid6 + aft,want); + checkCanonicalization(fore + sid6 + aft,want); // Check ASP_SESSIONID2: checkCanonicalization( @@ -173,7 +173,7 @@ // Check ASP_SESSIONID3: checkCanonicalization( "http://legislature.mi.gov/(a(4hqa0555fwsecu455xqckv45)S(4hqa0555fwsecu455xqckv45)f(4hqa0555fwsecu455xqckv45))/mileg.aspx?page=sessionschedules", - "legislature.mi.gov/(a(4hqa0555fwsecu455xqckv45)f(4hqa0555fwsecu455xqckv45))/mileg.aspx?page=sessionschedules"); + "legislature.mi.gov/mileg.aspx?page=sessionschedules"); // strip port 80 checkCanonicalization("http://www.chub.org:80/foo","chub.org/foo"); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-02-06 01:13:38
|
Revision: 2174 http://archive-access.svn.sourceforge.net/archive-access/?rev=2174&view=rev Author: bradtofel Date: 2008-02-05 17:13:42 -0800 (Tue, 05 Feb 2008) Log Message: ----------- BUGFIX: now calculates current(which translates to max) year on startup, which still will require a restart on New Years, but at least won't require more code changes.. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Timestamp.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/core/TimestampTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Timestamp.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Timestamp.java 2008-02-04 22:58:24 UTC (rev 2173) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/Timestamp.java 2008-02-06 01:13:42 UTC (rev 2174) @@ -43,7 +43,8 @@ private final static String LOWER_TIMESTAMP_LIMIT = "10000000000000"; private final static String UPPER_TIMESTAMP_LIMIT = "29991939295959"; private final static String YEAR_LOWER_LIMIT = "1996"; - private final static String YEAR_UPPER_LIMIT = "2008"; + private final static String YEAR_UPPER_LIMIT = + String.valueOf(Calendar.getInstance().get(Calendar.YEAR)); private final static String MONTH_LOWER_LIMIT = "01"; private final static String MONTH_UPPER_LIMIT = "12"; private final static String DAY_LOWER_LIMIT = "01"; Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/core/TimestampTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/core/TimestampTest.java 2008-02-04 22:58:24 UTC (rev 2173) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/core/TimestampTest.java 2008-02-06 01:13:42 UTC (rev 2174) @@ -24,6 +24,8 @@ */ package org.archive.wayback.core; +import java.util.Calendar; + import junit.framework.TestCase; import org.archive.wayback.core.Timestamp; @@ -40,11 +42,13 @@ */ public void testPadDateStr() { + String curYear = String.valueOf(Calendar.getInstance().get(Calendar.YEAR)); + assertEquals("padStart '1'","19960101000000",Timestamp.padStartDateStr("1")); assertEquals("padEnd '1'","19991231235959",Timestamp.padEndDateStr("1")); assertEquals("padStart '2'","20000101000000",Timestamp.padStartDateStr("2")); - assertEquals("padEnd","20081231235959",Timestamp.padEndDateStr("2")); - assertEquals("padEnd","20081231235959",Timestamp.padEndDateStr("3")); + assertEquals("padEnd",curYear + "1231235959",Timestamp.padEndDateStr("2")); + assertEquals("padEnd",curYear + "1231235959",Timestamp.padEndDateStr("3")); assertEquals("padEnd","20061231235959",Timestamp.padEndDateStr("2006")); assertEquals("padEnd","20061231235959",Timestamp.padEndDateStr("200613")); assertEquals("padEnd","20071231235959",Timestamp.padEndDateStr("2007")); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-04-15 00:01:03
|
Revision: 2239 http://archive-access.svn.sourceforge.net/archive-access/?rev=2239&view=rev Author: bradtofel Date: 2008-04-14 17:01:01 -0700 (Mon, 14 Apr 2008) Log Message: ----------- BUGFIX: was not updating import 'URL'; import "URL"; correctly Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java 2008-04-11 04:25:37 UTC (rev 2238) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java 2008-04-15 00:01:01 UTC (rev 2239) @@ -75,12 +75,18 @@ // "url\\s*\\(\\s*(['\"]?.+?['\"]?)\\s*\\)"; private static String cssUrlPatString = "url\\s*\\(\\s*([\\\\\"']*.+?[\\\\\"']*)\\s*\\)"; - + private static String cssImportPatString = "@import\\s+" + cssUrlPatString; + private static String cssImportNoUrlPatString = + "@import\\s+([\"'].+?[\"'])"; + private static Pattern cssImportPattern = Pattern.compile(cssImportPatString); + + private static Pattern cssImportNoUrlPattern = + Pattern.compile(cssImportNoUrlPatString); private static Pattern cssUrlPattern = Pattern.compile(cssUrlPatString); @@ -156,6 +162,7 @@ ResultURIConverter uriConverter, String captureDate, String baseUrl) { markupTagREURIC(page,uriConverter,captureDate,baseUrl,cssImportPattern); + markupTagREURIC(page,uriConverter,captureDate,baseUrl,cssImportNoUrlPattern); } public static void markupStyleUrls(StringBuilder page, Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java 2008-04-11 04:25:37 UTC (rev 2238) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java 2008-04-15 00:01:01 UTC (rev 2239) @@ -327,8 +327,13 @@ "@import url(\r\n\"http://web.archive.org/wayback/2004/http://foo.com/f.css\"\n\r);", "http://web.archive.org/wayback/","2004","http://foo.com/"); + checkCSSMarkup("@import \"http://foo.com/f.css\";", + "@import \"http://web.archive.org/wayback/2004/http://foo.com/f.css\";", + "http://web.archive.org/wayback/","2004","http://foo.com/"); + checkCSSMarkup("@import 'http://foo.com/f.css';", + "@import 'http://web.archive.org/wayback/2004/http://foo.com/f.css';", + "http://web.archive.org/wayback/","2004","http://foo.com/"); - } public void testStyleUrlMarkup() { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-06-24 21:42:41
|
Revision: 2302 http://archive-access.svn.sourceforge.net/archive-access/?rev=2302&view=rev Author: bradtofel Date: 2008-06-24 14:42:44 -0700 (Tue, 24 Jun 2008) Log Message: ----------- BUGFIX (ACC-21): now rewrites all(or at least more) url(XXX) tags in CSS. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java 2008-06-24 21:09:13 UTC (rev 2301) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/replay/TagMagix.java 2008-06-24 21:42:44 UTC (rev 2302) @@ -71,20 +71,12 @@ + APOSED_ATTR_VALUE + "|" + ESC_QUOTED_ATTR_VALUE + "|" + RAW_ATTR_VALUE; -// private static String cssUrlPatString = -// "url\\s*\\(\\s*(['\"]?.+?['\"]?)\\s*\\)"; private static String cssUrlPatString = "url\\s*\\(\\s*([\\\\\"']*.+?[\\\\\"']*)\\s*\\)"; - private static String cssImportPatString = - "@import\\s+" + cssUrlPatString; - private static String cssImportNoUrlPatString = "@import\\s+([\"'].+?[\"'])"; - private static Pattern cssImportPattern = - Pattern.compile(cssImportPatString); - private static Pattern cssImportNoUrlPattern = Pattern.compile(cssImportNoUrlPatString); @@ -161,8 +153,9 @@ public static void markupCSSImports(StringBuilder page, ResultURIConverter uriConverter, String captureDate, String baseUrl) { - markupTagREURIC(page,uriConverter,captureDate,baseUrl,cssImportPattern); +// markupTagREURIC(page,uriConverter,captureDate,baseUrl,cssImportPattern); markupTagREURIC(page,uriConverter,captureDate,baseUrl,cssImportNoUrlPattern); + markupTagREURIC(page,uriConverter,captureDate,baseUrl,cssUrlPattern); } public static void markupStyleUrls(StringBuilder page, Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java 2008-06-24 21:09:13 UTC (rev 2301) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/replay/TagMagixTest.java 2008-06-24 21:42:44 UTC (rev 2302) @@ -333,6 +333,26 @@ checkCSSMarkup("@import 'http://foo.com/f.css';", "@import 'http://web.archive.org/wayback/2004/http://foo.com/f.css';", "http://web.archive.org/wayback/","2004","http://foo.com/"); + + checkCSSMarkup("@import \"http://foo.com/f.css\"; @import url( http://foo.com/f.css);", + "@import \"http://web.archive.org/wayback/2004/http://foo.com/f.css\"; @import url( http://web.archive.org/wayback/2004/http://foo.com/f.css);", + "http://web.archive.org/wayback/","2004","http://foo.com/"); + + checkCSSMarkup("@import \"http://foo.com/f.css\";\n@import url( http://foo.com/f.css);", + "@import \"http://web.archive.org/wayback/2004/http://foo.com/f.css\";\n@import url( http://web.archive.org/wayback/2004/http://foo.com/f.css);", + "http://web.archive.org/wayback/","2004","http://foo.com/"); + + checkCSSMarkup("@import url( http://foo.com/f.css);\n@import \"http://foo.com/f.css\";", + "@import url( http://web.archive.org/wayback/2004/http://foo.com/f.css);\n@import \"http://web.archive.org/wayback/2004/http://foo.com/f.css\";", + "http://web.archive.org/wayback/","2004","http://foo.com/"); + + checkCSSMarkup("background: #9caad1 url('/~alabama/images/bg.jpg') 0 0 repeat-y;", + "background: #9caad1 url('http://web.archive.org/wayback/2004/http://foo.com/~alabama/images/bg.jpg') 0 0 repeat-y;", + "http://web.archive.org/wayback/","2004","http://foo.com/"); + + checkCSSMarkup("background: #9caad1 url('/~alabama/images/bg.jpg') 0 0 repeat-y;", + "background: #9caad1 url('http://web.archive.org/wayback/2004/http://foo.com/~alabama/images/bg.jpg') 0 0 repeat-y;", + "http://web.archive.org/wayback/","2004","http://foo.com/b/"); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-01 23:29:23
|
Revision: 2361 http://archive-access.svn.sourceforge.net/archive-access/?rev=2361&view=rev Author: bradtofel Date: 2008-07-01 16:29:31 -0700 (Tue, 01 Jul 2008) Log Message: ----------- FEATURE: added urlToHost() Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2008-07-01 23:27:49 UTC (rev 2360) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/util/url/UrlOperations.java 2008-07-01 23:29:31 UTC (rev 2361) @@ -16,6 +16,29 @@ */ public class UrlOperations { + public final static String DNS_SCHEME = "dns:"; + public final static String HTTP_SCHEME = "http://"; + public final static String HTTPS_SCHEME = "https://"; + public final static String FTP_SCHEME = "ftp://"; + public final static String MMS_SCHEME = "mms://"; + public final static String RTSP_SCHEME = "rtsp://"; + // go brewster + public final static String WAIS_SCHEME = "wais://"; + + public final static String ALL_SCHEMES[] = { + HTTP_SCHEME, + HTTPS_SCHEME, + FTP_SCHEME, + MMS_SCHEME, + RTSP_SCHEME, + WAIS_SCHEME + }; + + + public final static char PORT_SEPARATOR = ':'; + public final static char PATH_START = '/'; + + private static final String CC_TLDS = "ac|ad|ae|af|ag|ai|al|am|an|ao|aq" + "|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs" + "|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cx" + @@ -73,4 +96,32 @@ } return resolvedURI.getEscapedURI(); } + + public static String urlToHost(String url) { + if(url.startsWith("dns:")) { + return url.substring(4); + } + for(String scheme : ALL_SCHEMES) { + if(url.startsWith(scheme)) { + int hostIdx = scheme.length(); + int portIdx = url.indexOf(PORT_SEPARATOR, hostIdx + 1); + int pathIdx = url.indexOf(PATH_START, hostIdx + 1); + if(portIdx == -1 && pathIdx == -1) { + return url.substring(hostIdx); + } + if(portIdx == -1) { + return url.substring(hostIdx,pathIdx); + } + if(pathIdx == -1) { + return url.substring(hostIdx,portIdx); + } + if(pathIdx > portIdx) { + return url.substring(hostIdx,portIdx); + } else { + return url.substring(hostIdx,pathIdx); + } + } + } + return url; + } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2008-07-01 23:27:49 UTC (rev 2360) +++ trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/util/url/UrlOperationsTest.java 2008-07-01 23:29:31 UTC (rev 2361) @@ -27,5 +27,42 @@ } else { assertTrue("String("+s+") is not an Authority",want == got); } - } + } + public void testUrlToHost() { + assertEquals("foo.com",UrlOperations.urlToHost("dns:foo.com")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com/")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190/")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120/path")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180/path")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190/path")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120/path/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180/path/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190/path/")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com:120/path:/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com:180/path:/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com:190/path:/")); + + assertEquals("foo.com",UrlOperations.urlToHost("http://foo.com/path:/")); + assertEquals("foo.com",UrlOperations.urlToHost("https://foo.com/path:/")); + assertEquals("foo.com",UrlOperations.urlToHost("ftp://foo.com/path:/")); + + + } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <bra...@us...> - 2008-07-08 06:06:33
|
Revision: 2419 http://archive-access.svn.sourceforge.net/archive-access/?rev=2419&view=rev Author: bradtofel Date: 2008-07-07 23:06:41 -0700 (Mon, 07 Jul 2008) Log Message: ----------- REFACTOR: WaybackRequest now has get/set for all standard request values. Modified Paths: -------------- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDateRangeQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDateRangeQueryRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/HTTPAuthBooleanOperator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/IPMatchesBooleanOperator.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResults.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/UIQueryResults.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/FormRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/requestparser/OpenSearchRequestParser.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/LocalResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/NutchResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/RemoteResourceIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndex.java trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/webapp/AccessPoint.java trunk/archive-access/projects/wayback/wayback-core/src/test/java/org/archive/wayback/resourceindex/distributed/AlphaPartitionedIndexTest.java Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/ArchivalUrlReplayDispatcher.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -75,7 +75,7 @@ // if the result is not for the exact date requested, redirect to the // exact date. some capture dates are not 14 digits, only compare as // many digits as are in the result date: - String reqDateStr = wbRequest.get(WaybackRequest.REQUEST_DATE); + String reqDateStr = wbRequest.getReplayTimestamp(); String resDateStr = result.getCaptureTimestamp(); if(!resDateStr.equals(reqDateStr.substring(0, resDateStr.length()))) { return redirect; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDatePrefixQueryRequestParser.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -27,7 +27,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.httpclient.URIException; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.requestparser.PathRequestParser; @@ -66,15 +65,10 @@ startDate = Timestamp.parseBefore(dateStr).getDateStr(); endDate = Timestamp.parseAfter(dateStr).getDateStr(); } - wbRequest.put(WaybackRequest.REQUEST_START_DATE,startDate); - wbRequest.put(WaybackRequest.REQUEST_END_DATE,endDate); - wbRequest.put(WaybackRequest.REQUEST_TYPE, - WaybackRequest.REQUEST_URL_QUERY); - try { - wbRequest.setRequestUrl(urlStr); - } catch (URIException e) { - wbRequest = null; - } + wbRequest.setStartTimestamp(startDate); + wbRequest.setEndTimestamp(endDate); + wbRequest.setCaptureQueryRequest(); + wbRequest.setRequestUrl(urlStr); } return wbRequest; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDateRangeQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDateRangeQueryRequestParser.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathDateRangeQueryRequestParser.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -27,7 +27,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.httpclient.URIException; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.requestparser.PathRequestParser; @@ -61,15 +60,10 @@ String startDate = Timestamp.parseBefore(startDateStr).getDateStr(); String endDate = Timestamp.parseAfter(endDateStr).getDateStr(); - wbRequest.put(WaybackRequest.REQUEST_START_DATE,startDate); - wbRequest.put(WaybackRequest.REQUEST_END_DATE,endDate); - wbRequest.put(WaybackRequest.REQUEST_TYPE, - WaybackRequest.REQUEST_URL_QUERY); - try { - wbRequest.setRequestUrl(urlStr); - } catch (URIException e) { - wbRequest = null; - } + wbRequest.setStartTimestamp(startDate); + wbRequest.setEndTimestamp(endDate); + wbRequest.setCaptureQueryRequest(); + wbRequest.setRequestUrl(urlStr); } return wbRequest; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDatePrefixQueryRequestParser.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -27,7 +27,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.httpclient.URIException; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.requestparser.PathRequestParser; @@ -66,17 +65,11 @@ endDate = Timestamp.parseAfter(dateStr).getDateStr(); } - wbRequest.put(WaybackRequest.REQUEST_START_DATE, - startDate); - wbRequest.put(WaybackRequest.REQUEST_END_DATE,endDate); + wbRequest.setStartTimestamp(startDate); + wbRequest.setEndTimestamp(endDate); - wbRequest.put(WaybackRequest.REQUEST_TYPE, - WaybackRequest.REQUEST_URL_PREFIX_QUERY); - try { - wbRequest.setRequestUrl(urlStr); - } catch (URIException e) { - wbRequest = null; - } + wbRequest.setUrlQueryRequest(); + wbRequest.setRequestUrl(urlStr); } return wbRequest; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDateRangeQueryRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDateRangeQueryRequestParser.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/PathPrefixDateRangeQueryRequestParser.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -27,7 +27,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.httpclient.URIException; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.requestparser.PathRequestParser; @@ -58,17 +57,11 @@ String urlStr = matcher.group(3); String startDate = Timestamp.parseBefore(startDateStr).getDateStr(); String endDate = Timestamp.parseAfter(endDateStr).getDateStr(); - wbRequest.put(WaybackRequest.REQUEST_START_DATE, - startDate); - wbRequest.put(WaybackRequest.REQUEST_END_DATE,endDate); + wbRequest.setStartTimestamp(startDate); + wbRequest.setEndTimestamp(endDate); - wbRequest.put(WaybackRequest.REQUEST_TYPE, - WaybackRequest.REQUEST_URL_PREFIX_QUERY); - try { - wbRequest.setRequestUrl(urlStr); - } catch (URIException e) { - wbRequest = null; - } + wbRequest.setUrlQueryRequest(); + wbRequest.setRequestUrl(urlStr); } return wbRequest; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/archivalurl/requestparser/ReplayRequestParser.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -24,12 +24,9 @@ */ package org.archive.wayback.archivalurl.requestparser; -import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; - -import org.apache.commons.httpclient.URIException; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.requestparser.PathRequestParser; @@ -42,8 +39,6 @@ * @version $Date$, $Revision$ */ public class ReplayRequestParser extends PathRequestParser { - private static final Logger LOGGER = Logger.getLogger( - ReplayRequestParser.class.getName()); /** * Regex which parses Archival URL replay requests into timestamp + url */ @@ -65,8 +60,6 @@ // based upon amount given (2001 => 20010101... - 20011231...) // AND assume the user asked for the LATEST possible date // within that range... - // - // ...don't ask me, I just work here. String startDate = null; String endDate = null; @@ -76,36 +69,22 @@ } else { // classic behavior: - // startDate = Timestamp.parseBefore(dateStr).getDateStr(); - // endDate = Timestamp.parseAfter(dateStr).getDateStr(); - // dateStr = endDate; + startDate = Timestamp.parseBefore(dateStr).getDateStr(); + endDate = Timestamp.parseAfter(dateStr).getDateStr(); + dateStr = endDate; - // "better" behavior: - startDate = getEarliestTimestamp(); - endDate = getLatestTimestamp(); - dateStr = Timestamp.parseAfter(dateStr).getDateStr(); + // maybe "better" behavior: +// startDate = getEarliestTimestamp(); +// endDate = getLatestTimestamp(); +// dateStr = Timestamp.parseAfter(dateStr).getDateStr(); } - wbRequest.put(WaybackRequest.REQUEST_DATE, dateStr); - wbRequest.put(WaybackRequest.REQUEST_START_DATE, startDate); - wbRequest.put(WaybackRequest.REQUEST_END_DATE, endDate); + wbRequest.setReplayTimestamp(dateStr); + wbRequest.setStartTimestamp(startDate); + wbRequest.setEndTimestamp(endDate); - wbRequest.put(WaybackRequest.REQUEST_TYPE, - WaybackRequest.REQUEST_REPLAY_QUERY); - - try { -// String wbPrefix = wbRequest.getDefaultWaybackPrefix(); -// if (urlStr.startsWith(wbPrefix)) { -// wbRequest.setBetterRequestURI(urlStr); -// } - wbRequest.setRequestUrl(urlStr); - } catch (URIException e) { - if(urlStr != null) { - LOGGER.severe("Failed parse of url(" + urlStr + ")"); - } - e.printStackTrace(); - wbRequest = null; - } + wbRequest.setReplayRequest(); + wbRequest.setRequestUrl(urlStr); } return wbRequest; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/HTTPAuthBooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/HTTPAuthBooleanOperator.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/HTTPAuthBooleanOperator.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -11,7 +11,7 @@ if(allowedUsers == null) { return false; } - String currentUser = value.get(WaybackRequest.REQUEST_REMOTE_USER); + String currentUser = value.getRemoteUser(); if(currentUser == null) { return false; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/IPMatchesBooleanOperator.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/IPMatchesBooleanOperator.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/authenticationcontrol/IPMatchesBooleanOperator.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -33,7 +33,7 @@ if(allowedRanges == null) { return false; } - String ipString = value.get(WaybackRequest.REQUEST_REMOTE_ADDRESS); + String ipString = value.getRemoteIPAddress(); if(ipString == null) { return false; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResults.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResults.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/CaptureSearchResults.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -109,16 +109,12 @@ CaptureSearchResult closest = null; long closestDistance = 0; CaptureSearchResult cur = null; - String anchorDate = wbRequest.get(WaybackRequest.REQUEST_ANCHOR_DATE); + String anchorDate = wbRequest.getAnchorTimestamp(); long maxWindow = -1; - long wantTime = Timestamp.parseBefore(wbRequest - .get(WaybackRequest.REQUEST_EXACT_DATE)).getDate().getTime(); + long wantTime = wbRequest.getReplayDate().getTime(); if(anchorDate != null) { wantTime = Timestamp.parseBefore(anchorDate).getDate().getTime(); - String anchorWindow = wbRequest.get(WaybackRequest.REQUEST_ANCHOR_WINDOW); - if(anchorWindow != null) { - maxWindow = Long.parseLong(anchorWindow); - } + maxWindow = wbRequest.getAnchorWindow(); } Iterator<CaptureSearchResult> itr = results.iterator(); @@ -132,7 +128,7 @@ closestDistance = curDistance; } } - if(err && (maxWindow != -1)) { + if(err && (maxWindow > 0)) { if(closestDistance > maxWindow) { throw new AnchorWindowTooSmallException("Closest is " + closestDistance + " seconds away, Window is " + Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/UIResults.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -28,7 +28,6 @@ import javax.servlet.http.HttpServletRequest; -import org.apache.commons.httpclient.URIException; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.util.StringFormatter; import org.archive.wayback.webapp.AccessPoint; @@ -72,14 +71,8 @@ public String makeCaptureQueryUrl(String url) { WaybackRequest newWBR = wbRequest.clone(); - newWBR.put(WaybackRequest.REQUEST_TYPE, - WaybackRequest.REQUEST_URL_QUERY); - try { - newWBR.setRequestUrl(url); - } catch (URIException e) { - // should not happen... - e.printStackTrace(); - } + newWBR.setCaptureQueryRequest(); + newWBR.setRequestUrl(url); return newWBR.getContextPrefix() + "query?" + newWBR.getQueryArguments(1); } @@ -214,7 +207,7 @@ */ public String getContextConfig(final String configName) { String configValue = null; - AccessPoint context = getWbRequest().getContext(); + AccessPoint context = getWbRequest().getAccessPoint(); if(context != null) { Properties configs = context.getConfigs(); if(configs != null) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/core/WaybackRequest.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -23,6 +23,7 @@ package org.archive.wayback.core; +import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.Locale; @@ -34,9 +35,6 @@ import javax.servlet.http.Cookie; import javax.servlet.http.HttpServletRequest; -import org.apache.commons.httpclient.URIException; -import org.archive.net.UURI; -import org.archive.net.UURIFactory; import org.archive.wayback.requestparser.OpenSearchRequestParser; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.StringFormatter; @@ -51,185 +49,315 @@ */ public class WaybackRequest { - public final static String REQUEST_ANCHOR_DATE = "request.anchordate"; - public final static String REQUEST_ANCHOR_WINDOW = "request.anchorwindow"; - + /** + * indicates the number of requests per page, only makes sense for + * Capture/Url queries. + */ private int resultsPerPage = 10; - + /** + * indicates the specific page of results to show, for paginated requests, + * only makes sense for Capture/Url queries. + */ private int pageNum = 1; - + /** + * absolute URL prefix to the AccessPoint which received this request + */ private String contextPrefix = null; + /** + * absolute URL prefix to the Server(webapp) which received this request + */ private String serverPrefix = null; - private AccessPoint context = null; + /** + * reference to the AccessPoint which received this request. + */ + private AccessPoint accessPoint = null; + /** + * custom CaptureSearchResult Filter to use for this specific request. Can + * be null, and is sometimes useful to allow an AccessPoint to have specific + * and possibly variable filters. + */ private ObjectFilter<CaptureSearchResult> exclusionFilter = null; - - private HashMap<String,String> filters = new HashMap<String,String>(); - - private StringFormatter formatter = null; /** - * Request: Authorization Type: "BASIC", "SSL", or "" if none. + * StringFormatter object set up with the users specific Locale, and the + * Wayback UI ResourceBundle prepared for use, simplifying UI generation + * somewhat. */ - public static final String REQUEST_AUTH_TYPE = "requestauthtype"; + private StringFormatter formatter = null; /** - * Request: Wayback Context: the string context used in the request, - * if applicable. + * generic String-to-String map of various request filters and type + * information. See constants below for keys & values. */ - public static final String REQUEST_WAYBACK_CONTEXT = "waybackcontext"; + private HashMap<String,String> filters = new HashMap<String,String>(); + + + /* + * ********************** + * REQUEST TYPE CONSTANTS + * ********************** + */ /** - * Request: Wayback Port: the port the remote user connected to for this - * request. + * specifies the TYPE of the this particular request. One of: + * *) REQUEST_REPLAY_QUERY + * *) REQUEST_CAPTURE_QUERY + * *) REQUEST_URL_QUERY */ - public static final String REQUEST_WAYBACK_PORT = "waybackport"; + public static final String REQUEST_TYPE = "type"; /** - * Request: Wayback Hostname: the string "Host:" HTTP header + * REQUEST_TYPE option indicating a request for Replay of the Resource + * matching REQUEST_URL closest in time to REQUEST_DATE */ - public static final String REQUEST_WAYBACK_HOSTNAME = "waybackhostname"; + public static final String REQUEST_REPLAY_QUERY = "replay"; /** - * Request: Remote Address, string IP address: "127.0.0.1" + * REQUEST_TYPE option indicating a query against the ResourceIndex for + * captures of URLs matching the REQUEST_URL */ - public static final String REQUEST_REMOTE_ADDRESS = "remoteaddress"; + public static final String REQUEST_CAPTURE_QUERY = "capturequery"; /** - * Request: auto resolution (TimeLine mode) + * REQUEST_TYPE option indicating a query against the ResourceIndex for + * summaries of URLs prefixed with the REQUEST_URL */ - public static final String REQUEST_RESOLUTION_AUTO = "auto"; + public static final String REQUEST_URL_QUERY = "urlquery"; + /* + * ********************** + * /REQUEST TYPE CONSTANTS + * ********************** + */ + + + /* + * ****************** + * URL/DATE CONSTANTS + * ****************** + */ /** - * Request: year resolution (TimeLine mode) + * GUARANTEED PRESENT: Original(RAW) URL or URL prefix requested, before any + * cleanup/fixing */ - public static final String REQUEST_RESOLUTION_YEARS = "years"; + public static final String REQUEST_URL = "url"; + /** - * Request: two-month resolution (TimeLine mode) + * Cleaned up version of original requested URL or URL prefix, as performed + * by UURIFactory. */ - public static final String REQUEST_RESOLUTION_TWO_MONTHS = "twomonths"; +// public static final String REQUEST_URL_CLEANED = "cleanedurl"; + /** - * Request: month resolution (TimeLine mode) + * GUARANTEED PRESENT: omit results after this 14-digit String timestamp. + * Possibly created from: + * 1) specified directly in request + * 2) a partial REQUEST_DATE (latest possible given a prefix) + * 3) RequestParser default + * 4) 14-digit representation of the moment the request was recieved. */ - public static final String REQUEST_RESOLUTION_MONTHS = "months"; + public static final String REQUEST_END_DATE = "enddate"; + /** - * Request: day resolution (TimeLine mode) + * GUARANTEED PRESENT: omit results before this 14-digit String timestamp. + * Possibly created from: + * 1) specified directly in request + * 2) a partial REQUEST_DATE (earliest possible given a prefix) + * 3) RequestParser default + * 4) 14-digit representation of midnight Jan 1, 1996. */ - public static final String REQUEST_RESOLUTION_DAYS = "days"; + public static final String REQUEST_START_DATE = "startdate"; + /** - * Request: hour resolution (TimeLine mode) + * GUARANTEED PRESENT for Replay requests only, no meaning for Query + * requests. + * Original (RAW/possibly partial) 14-digit timestamp of date requested for + * Replay */ - public static final String REQUEST_RESOLUTION_HOURS = "hours"; + public static final String REQUEST_DATE = "date"; + /** - * Request: replay actual document or metadata for document: "yes" means - * replay metadata only, not the actual document: (TimeLine mode) + * GUARANTEED PRESENT for Replay requests only, no meaning for Query + * requests. + * Cleaned up version of original REQUEST_DATE, padded to 14 digits assuming + * the */ - public static final String REQUEST_META_MODE = "metamode"; + public static final String REQUEST_EXACT_DATE = "exactdate"; + /** - * Request: resolution of results to be displayed: (TimeLine mode) + * Indicates user only wants results that exactly match the hostname within + * REQUEST_URL -- no canonicalization. */ - public static final String REQUEST_RESOLUTION = "resolution"; + public static final String REQUEST_EXACT_HOST_ONLY = "requestexacthost"; + /** - * Request: closest type request + * indicates positive value for any request boolean flag. */ - public static final String REQUEST_CLOSEST_QUERY = "urlclosestquery"; + public static final String REQUEST_YES = "yes"; + /** - * Request: replay type request + * Replay-Only: indicates the date to tend towards when computing closest + * matches within time. Used to prevent "time drift" while surfing from a + * particular date. */ - public static final String REQUEST_REPLAY_QUERY = "replay"; + public final static String REQUEST_ANCHOR_DATE = "request.anchordate"; + /** - * Request: urlprefixquery type request + * Replay-Only: String representation of number of seconds. Used only in + * conjunction with REQUEST_ANCHOR_DATE, and indicates that documents more + * than this many seconds should not be shown in a replay session. Useful + * for QA purposes, to ensure that all content within a replay session was + * crawled near a particular point, the REQUEST_ANCHOR_DATE. */ - public static final String REQUEST_URL_PREFIX_QUERY = "urlprefixquery"; + public final static String REQUEST_ANCHOR_WINDOW = "request.anchorwindow"; + /* + * ****************** + * /URL/DATE CONSTANTS + * ****************** + */ + + + /* + * ******************************* + * OUTPUT TYPE CONSTANTS + * ******************************* + */ /** - * Request: urlquery type request + * Request: replay actual document or metadata for document: "yes" means + * replay metadata only, not the actual document: (TimeLine mode) */ - public static final String REQUEST_URL_QUERY = "urlquery"; + public static final String REQUEST_META_MODE = "metamode"; /** * Request: xml data requested */ public static final String REQUEST_XML_DATA = "xmldata"; + /* + * ******************************* + * /OUTPUT TYPE CONSTANTS + * ******************************* + */ + + /* + * ******************************* + * CONTEXT & ACCESSPOINT CONSTANTS + * ******************************* + */ /** - * Request: defines type - urlquery, urlprefixquery, or replay + * the string (webapp) context that received this request */ - public static final String REQUEST_TYPE = "type"; + public static final String REQUEST_WAYBACK_CONTEXT = "waybackcontext"; /** - * Request: URL of referrer, if supplied, or "" if not + * the port the remote user connected to for this request */ + public static final String REQUEST_WAYBACK_PORT = "waybackport"; + /* + * ******************************* + * /CONTEXT & ACCESSPOINT CONSTANTS + * ******************************* + */ + + /* + * ***************************** + * HTTP HEADER/REQUEST CONSTANTS + * ***************************** + */ + /** + * incoming requests HTTP "Host:" header, or null + */ + public static final String REQUEST_WAYBACK_HOSTNAME = "waybackhostname"; + /** + * incoming requests HTTP "Referer:" header, or null + */ public static final String REQUEST_REFERER_URL = "refererurl"; /** - * Request: Original URL or URL prefix requested. - * This version differs from @{link {@link REQUEST_URL} in that its - * the URL before it was passed via the UURIFactory cleanup. + * Remote Address that connected to this webapp to create the request + * string IP address: "127.0.0.1" */ - public static final String REQUEST_URL_CLEANED = "cleanedurl"; + public static final String REQUEST_REMOTE_ADDRESS = "remoteaddress"; /** - * Request: URL or URL prefix requested + * Remote User or null if the request did not contain auth info. + * see HttpServletRequest.getRemoteUser() */ - public static final String REQUEST_URL = "url"; + public static final String REQUEST_REMOTE_USER = "requestremoteuser"; + /** - * Request: (replay) find closest result to this 14-digit timestamp + * User Locale name: Best Guess at users requested locale. + * see ServletRequest.getLocale().getDisplayLanguage() */ - public static final String REQUEST_EXACT_DATE = "exactdate"; + public static final String REQUEST_LOCALE_LANG = "requestlocalelang"; /** - * Request: filter results after this 14-digit timestamp + * Authorization Type: "BASIC", "SSL", or null if none. + * see HttpServletRequest.getAuthType() */ - public static final String REQUEST_END_DATE = "enddate"; + public static final String REQUEST_AUTH_TYPE = "requestauthtype"; + /* + * *********************** + * /HTTP HEADER/REQUEST CONSTANTS + * *********************** + */ + + /* + * *********************** + * TIMELINE MODE CONSTANTS + * *********************** + */ /** - * Request: filter results before this 14-digit timestamp + * resolution of results to be displayed: (TimeLine mode) */ - public static final String REQUEST_START_DATE = "startdate"; + public static final String REQUEST_RESOLUTION = "resolution"; /** - * Request: (query) filter results to those prefixed with this (possibly - * partial) 14-digit timestamp + * auto resolution (TimeLine mode) */ - public static final String REQUEST_DATE = "date"; + public static final String REQUEST_RESOLUTION_AUTO = "auto"; /** - * Request: Remote User or "" if the request did not contain auth info. + * year resolution (TimeLine mode) */ - public static final String REQUEST_REMOTE_USER = "requestremoteuser"; + public static final String REQUEST_RESOLUTION_YEARS = "years"; /** - * Request: Best Guess at users requested locale. + * two-month resolution (TimeLine mode) */ - public static final String REQUEST_LOCALE_LANG = "requestlocalelang"; + public static final String REQUEST_RESOLUTION_TWO_MONTHS = "twomonths"; /** - * Request: Indicates user only wants results that exactly match the - * requested hostname -- no canonicalization. + * month resolution (TimeLine mode) */ - public static final String REQUEST_EXACT_HOST_ONLY = "requestexacthost"; + public static final String REQUEST_RESOLUTION_MONTHS = "months"; /** - * Request: indicates positive value for any request boolean flag. + * day resolution (TimeLine mode) */ - public static final String REQUEST_YES = "yes"; + public static final String REQUEST_RESOLUTION_DAYS = "days"; + /** + * hour resolution (TimeLine mode) + */ + public static final String REQUEST_RESOLUTION_HOURS = "hours"; + /* + * *********************** + * /TIMELINE MODE CONSTANTS + * *********************** + */ + private static String UI_RESOURCE_BUNDLE_NAME = "WaybackUI"; + /** + * set of filter keys that are not forwarded to subsequent paginated + * requests. + */ private final static String standardHeaders[] = { - WaybackRequest.REQUEST_REFERER_URL, - WaybackRequest.REQUEST_REMOTE_ADDRESS, - WaybackRequest.REQUEST_WAYBACK_HOSTNAME, - WaybackRequest.REQUEST_WAYBACK_PORT, - WaybackRequest.REQUEST_WAYBACK_CONTEXT, - WaybackRequest.REQUEST_AUTH_TYPE, - WaybackRequest.REQUEST_REMOTE_USER, - WaybackRequest.REQUEST_LOCALE_LANG }; + REQUEST_REFERER_URL, + REQUEST_REMOTE_ADDRESS, + REQUEST_WAYBACK_HOSTNAME, + REQUEST_WAYBACK_PORT, + REQUEST_WAYBACK_CONTEXT, + REQUEST_AUTH_TYPE, + REQUEST_REMOTE_USER, + REQUEST_LOCALE_LANG }; /** - * Constructor, possibly/probably this should BE a Properties, instead of - * HAVEing a Properties... + * @return Returns the resultsPerPage. */ - public WaybackRequest() { - super(); + public int getResultsPerPage() { + return resultsPerPage; } /** - * @return true if REQUEST_TYPE is set, and is set to REQUEST_REPLAY_QUERY + * @param resultsPerPage + * The resultsPerPage to set. */ - public boolean isReplayRequest() { - String type = get(WaybackRequest.REQUEST_TYPE); - if(type != null && type.equals(WaybackRequest.REQUEST_REPLAY_QUERY)) { - return true; - } - return false; + public void setResultsPerPage(int resultsPerPage) { + this.resultsPerPage = resultsPerPage; } - /** - * @return true if true if REQUEST_TYPE is not set, or is set to a value - * other than REQUEST_REPLAY_QUERY - */ - public boolean isQueryRequest() { - return !isReplayRequest(); - } /** * @return Returns the pageNum. @@ -247,34 +375,82 @@ } /** - * @return Returns the resultsPerPage. + * @param prefix */ - public int getResultsPerPage() { - return resultsPerPage; + public void setContextPrefix(String prefix) { + contextPrefix = prefix; } /** - * @param resultsPerPage - * The resultsPerPage to set. + * Construct an absolute URL that points to the root of the context that + * received the request, including a trailing "/". + * + * @return String absolute URL pointing to the Context root where the + * request was received. */ - public void setResultsPerPage(int resultsPerPage) { - this.resultsPerPage = resultsPerPage; + public String getContextPrefix() { + if(contextPrefix == null) { + return ""; + } + return contextPrefix; } /** - * @param key - * @return boolean, true if the request contains key 'key' + * @param prefix */ - public boolean containsKey(String key) { - return filters.containsKey(key); + public void setServerPrefix(String prefix) { + serverPrefix = prefix; } /** + * @param prefix + * @return an absolute String URL that will point to the root of the + * server that is handling the request. + */ + public String getServerPrefix() { + if(serverPrefix == null) { + return ""; + } + return serverPrefix; + } + /** + * @return the accessPoint + */ + public AccessPoint getAccessPoint() { + return accessPoint; + } + + /** + * @param accessPoint the accessPoint to set + */ + public void setAccessPoint(AccessPoint accessPoint) { + this.accessPoint = accessPoint; + } + + public ObjectFilter<CaptureSearchResult> getExclusionFilter() { + return exclusionFilter; + } + + public void setExclusionFilter(ObjectFilter<CaptureSearchResult> exclusionFilter) { + this.exclusionFilter = exclusionFilter; + } + + /** + * @return StringFormatter based on user request info + */ + public StringFormatter getFormatter() { + if(formatter == null) { + setLocale(Locale.getAvailableLocales()[0]); + } + return formatter; + } + + /** * @param key * @return String value for key 'key', or null if no value exists */ public String get(String key) { - return (String) filters.get(key); + return filters.get(key); } /** @@ -284,14 +460,238 @@ public void put(String key, String value) { filters.put(key, value); } + public void remove(String key) { + filters.remove(key); + } - private String emptyIfNull(String arg) { - if (arg == null) { - return ""; + private void setBoolean(String key, boolean value) { + if(value) { + put(key,REQUEST_YES); + } else { + remove(key); } - return arg; + } + private boolean getBoolean(String key) { + String value = get(key); + return(value == null || !value.equals(REQUEST_YES)); + } + /** + * @param key + * @return boolean, true if the request contains key 'key' + * @deprecated + */ + public boolean containsKey(String key) { + return filters.containsKey(key); } + + private void putUnlessNull(String key, String val) { + if (val != null) { + put(key,val); + } + } + + private boolean isRequestType(String requestType) { + String type = get(REQUEST_TYPE); + if(type != null && type.equals(requestType)) { + return true; + } + return false; + } + + /** + * @return true if this is a Replay request + */ + public boolean isReplayRequest() { + return isRequestType(REQUEST_REPLAY_QUERY); + } + /** + * marks this request as a Replay request + */ + public void setReplayRequest() { + put(REQUEST_TYPE,REQUEST_REPLAY_QUERY); + } + /** + * @return true if this is a Capture Query request + */ + public boolean isCaptureQueryRequest() { + return isRequestType(REQUEST_CAPTURE_QUERY); + } + /** + * marks this request as a Replay request + */ + public void setCaptureQueryRequest() { + put(REQUEST_TYPE,REQUEST_CAPTURE_QUERY); + } + /** + * @return true if this is an Url Query request + */ + public boolean isUrlQueryRequest() { + return isRequestType(REQUEST_URL_QUERY); + } + /** + * marks this request as a Replay request + */ + public void setUrlQueryRequest() { + put(REQUEST_TYPE,REQUEST_URL_QUERY); + } + + public String getRequestUrl() { + return get(REQUEST_URL); + } + /** + * Set the request URL. + * @param urlStr Request URL. + */ + public void setRequestUrl(String urlStr) { + // TODO: fix this to use other schemes + if (!urlStr.startsWith("http://")) { + if(urlStr.startsWith("http:/")) { + urlStr = "http://" + urlStr.substring(6); + } else { + urlStr = "http://" + urlStr; + } + } +// UURI requestURI = UURIFactory.getInstance(urlStr); +// put(REQUEST_URL_CLEANED, requestURI.toString()); + put(REQUEST_URL, urlStr); + } + public String getEndTimestamp() { + return get(REQUEST_END_DATE); + } + public Date getEndDate() { + return Timestamp.parseAfter(get(REQUEST_END_DATE)).getDate(); + } + public void setEndDate(Date date) { + put(REQUEST_END_DATE,new Timestamp(date).getDateStr()); + } + public void setEndTimestamp(String timestamp) { + put(REQUEST_END_DATE,timestamp); + } + + public String getStartTimestamp() { + return get(REQUEST_START_DATE); + } + public Date getStartDate() { + return Timestamp.parseBefore(get(REQUEST_START_DATE)).getDate(); + } + public void setStartDate(Date date) { + put(REQUEST_START_DATE,new Timestamp(date).getDateStr()); + } + public void setStartTimestamp(String timestamp) { + put(REQUEST_START_DATE,timestamp); + } + + public String getReplayTimestamp() { + return get(REQUEST_DATE); + } + public Date getReplayDate() { + return Timestamp.parseAfter(get(REQUEST_DATE)).getDate(); + } + public void setReplayDate(Date date) { + put(REQUEST_DATE,new Timestamp(date).getDateStr()); + } + public void setReplayTimestamp(String timestamp) { + put(REQUEST_DATE,timestamp); + } + + public void setExactHost(boolean isExactHost) { + setBoolean(REQUEST_EXACT_HOST_ONLY,isExactHost); + } + public boolean isExactHost() { + return getBoolean(REQUEST_EXACT_HOST_ONLY); + } + + public String getAnchorTimestamp() { + return get(REQUEST_ANCHOR_DATE); + } + public Date getAnchorDate() { + return Timestamp.parseAfter(get(REQUEST_ANCHOR_DATE)).getDate(); + } + public void setAnchorDate(Date date) { + put(REQUEST_ANCHOR_DATE,new Timestamp(date).getDateStr()); + } + public void setAnchorTimestamp(String timestamp) { + put(REQUEST_ANCHOR_DATE,timestamp); + } + + public long getAnchorWindow() { + String seconds = get(REQUEST_ANCHOR_WINDOW); + if(seconds == null) { + return 0; + } + return Long.parseLong(seconds); + } + public void setAnchorWindow(long seconds) { + put(REQUEST_ANCHOR_WINDOW,String.valueOf(seconds));; + } + + public void setMetaMode(boolean isMetaMode) { + setBoolean(REQUEST_META_MODE,isMetaMode); + } + public boolean isMetaMode() { + return getBoolean(REQUEST_META_MODE); + } + + public void setXMLMode(boolean isXMLMode) { + setBoolean(REQUEST_XML_DATA,isXMLMode); + } + public boolean isXMLMode() { + return getBoolean(REQUEST_XML_DATA); + } + + public String getWaybackContext() { + return get(REQUEST_WAYBACK_CONTEXT); + } + public int getWaybackPort() { + String port = get(REQUEST_WAYBACK_PORT); + if(port == null) { + return 0; + } + return Integer.parseInt(port); + } + + public String getWaybackHostname() { + return get(REQUEST_WAYBACK_HOSTNAME); + } + public String getRefererUrl() { + return get(REQUEST_REFERER_URL); + } + public String getRemoteIPAddress() { + return get(REQUEST_REMOTE_ADDRESS); + } + public String getRemoteUser() { + return get(REQUEST_REMOTE_USER); + } + public String getLocaleLanguage() { + return get(REQUEST_LOCALE_LANG); + } + public String getAuthType() { + return get(REQUEST_AUTH_TYPE); + } + + public String getTimelineResolution() { + return get(REQUEST_RESOLUTION); + } + public void setTimelineAutoResolution() { + put(REQUEST_RESOLUTION,REQUEST_RESOLUTION_AUTO); + } + public void setTimelineYearResolution() { + put(REQUEST_RESOLUTION,REQUEST_RESOLUTION_YEARS); + } + public void setTimelineTwoMonthResolution() { + put(REQUEST_RESOLUTION,REQUEST_RESOLUTION_TWO_MONTHS); + } + public void setTimelineMonthResolution() { + put(REQUEST_RESOLUTION,REQUEST_RESOLUTION_MONTHS); + } + public void setTimelineDayResolution() { + put(REQUEST_RESOLUTION,REQUEST_RESOLUTION_DAYS); + } + public void setTimelineHourResolution() { + put(REQUEST_RESOLUTION,REQUEST_RESOLUTION_HOURS); + } + /** * Set the Locale for the request, which impacts UI Strings * @param l @@ -301,14 +701,6 @@ formatter = new StringFormatter(b,l); } - private String getUserLocale(HttpServletRequest httpRequest) { - Locale l = httpRequest.getLocale(); - ResourceBundle b = ResourceBundle.getBundle(UI_RESOURCE_BUNDLE_NAME, - httpRequest.getLocale()); - formatter = new StringFormatter(b,l); - return emptyIfNull(httpRequest.getLocale().getDisplayLanguage()); - } - /** * extract REFERER, remote IP and authorization information from the * HttpServletRequest @@ -316,23 +708,22 @@ * @param httpRequest */ private void extractHttpRequestInfo(HttpServletRequest httpRequest) { - // attempt to get the HTTP referer if present.. - put(WaybackRequest.REQUEST_REFERER_URL, emptyIfNull(httpRequest - .getHeader("REFERER"))); - put(WaybackRequest.REQUEST_REMOTE_ADDRESS, emptyIfNull(httpRequest - .getRemoteAddr())); - put(WaybackRequest.REQUEST_WAYBACK_HOSTNAME, emptyIfNull(httpRequest - .getLocalName())); - put(WaybackRequest.REQUEST_WAYBACK_PORT, String.valueOf(httpRequest - .getLocalPort())); - put(WaybackRequest.REQUEST_WAYBACK_CONTEXT, emptyIfNull(httpRequest - .getContextPath())); - put(WaybackRequest.REQUEST_AUTH_TYPE, emptyIfNull(httpRequest - .getAuthType())); - put(WaybackRequest.REQUEST_REMOTE_USER, emptyIfNull(httpRequest - .getRemoteUser())); - put(WaybackRequest.REQUEST_LOCALE_LANG,getUserLocale(httpRequest)); + + putUnlessNull(REQUEST_REFERER_URL, httpRequest.getHeader("REFERER")); + putUnlessNull(REQUEST_REMOTE_ADDRESS, httpRequest.getRemoteAddr()); + putUnlessNull(REQUEST_WAYBACK_HOSTNAME, httpRequest.getLocalName()); + putUnlessNull(REQUEST_AUTH_TYPE, httpRequest.getAuthType()); + putUnlessNull(REQUEST_REMOTE_USER, httpRequest.getRemoteUser()); + putUnlessNull(REQUEST_WAYBACK_PORT, + String.valueOf(httpRequest.getLocalPort())); + putUnlessNull(REQUEST_WAYBACK_CONTEXT, httpRequest.getContextPath()); + Locale l = httpRequest.getLocale(); + ResourceBundle b = ResourceBundle.getBundle(UI_RESOURCE_BUNDLE_NAME, + httpRequest.getLocale()); + formatter = new StringFormatter(b,l); + putUnlessNull(REQUEST_LOCALE_LANG,l.getDisplayLanguage()); + Cookie[] cookies = httpRequest.getCookies(); if(cookies != null) { for(Cookie cookie : cookies) { @@ -342,46 +733,6 @@ } /** - * @param prefix - */ - public void setServerPrefix(String prefix) { - serverPrefix = prefix; - } - - /** - * @param prefix - * @return an absolute String URL that will point to the root of the - * server that is handling the request. - */ - public String getServerPrefix() { - if(serverPrefix == null) { - return ""; - } - return serverPrefix; - } - - - /** - * @param prefix - */ - public void setContextPrefix(String prefix) { - contextPrefix = prefix; - } - /** - * Construct an absolute URL that points to the root of the context that - * recieved the request, including a trailing "/". - * - * @return String absolute URL pointing to the Context root where the - * request was revieved. - */ - public String getContextPrefix() { - if(contextPrefix == null) { - return ""; - } - return contextPrefix; - } - - /** * attempt to fixup this WaybackRequest, mostly with respect to dates: if * only "date" was specified, infer start and end dates from it. Also grab * useful info from the HttpServletRequest, cookies, remote address, etc. @@ -390,32 +741,32 @@ */ public void fixup(HttpServletRequest httpRequest) { extractHttpRequestInfo(httpRequest); - String startDate = get(WaybackRequest.REQUEST_START_DATE); - String endDate = get(WaybackRequest.REQUEST_END_DATE); - String exactDate = get(WaybackRequest.REQUEST_EXACT_DATE); - String partialDate = get(WaybackRequest.REQUEST_DATE); + String startDate = get(REQUEST_START_DATE); + String endDate = get(REQUEST_END_DATE); + String exactDate = get(REQUEST_EXACT_DATE); + String partialDate = get(REQUEST_DATE); if (partialDate == null) { partialDate = ""; } if (startDate == null || startDate.length() == 0) { - put(WaybackRequest.REQUEST_START_DATE, Timestamp + put(REQUEST_START_DATE, Timestamp .padStartDateStr(partialDate)); } else if (startDate.length() < 14) { - put(WaybackRequest.REQUEST_START_DATE, Timestamp + put(REQUEST_START_DATE, Timestamp .padStartDateStr(startDate)); } if (endDate == null || endDate.length() == 0) { - put(WaybackRequest.REQUEST_END_DATE, Timestamp + put(REQUEST_END_DATE, Timestamp .padEndDateStr(partialDate)); } else if (endDate.length() < 14) { - put(WaybackRequest.REQUEST_END_DATE, Timestamp + put(REQUEST_END_DATE, Timestamp .padEndDateStr(endDate)); } if (exactDate == null || exactDate.length() == 0) { - put(WaybackRequest.REQUEST_EXACT_DATE, Timestamp + put(REQUEST_EXACT_DATE, Timestamp .padEndDateStr(partialDate)); } else if (exactDate.length() < 14) { - put(WaybackRequest.REQUEST_EXACT_DATE, Timestamp + put(REQUEST_EXACT_DATE, Timestamp .padEndDateStr(exactDate)); } } @@ -469,35 +820,6 @@ + OpenSearchRequestParser.START_PAGE + "=" + pageNum; } - /** - * Set the request URL. - * Also populates request url cleaned. - * @param urlStr Request URL. - * @throws URIException - */ - public void setRequestUrl(String urlStr) throws URIException { - if (!urlStr.startsWith("http://")) { - if(urlStr.startsWith("http:/")) { - urlStr = "http://" + urlStr.substring(6); - } else { - urlStr = "http://" + urlStr; - } - } - // If its not http, next line throws exception. TODO: Fix. - UURI requestURI = UURIFactory.getInstance(urlStr); - put(WaybackRequest.REQUEST_URL_CLEANED, requestURI.toString()); - put(WaybackRequest.REQUEST_URL, urlStr); - } - - /** - * @return StringFormatter based on user request info - */ - public StringFormatter getFormatter() { - if(formatter == null) { - setLocale(Locale.getAvailableLocales()[0]); - } - return formatter; - } public WaybackRequest clone() { WaybackRequest wbRequest = new WaybackRequest(); @@ -521,27 +843,10 @@ } /** - * @return the context + * + * @return + * @deprecated */ - public AccessPoint getContext() { - return context; - } - - /** - * @param context the context to set - */ - public void setContext(AccessPoint context) { - this.context = context; - } - - public ObjectFilter<CaptureSearchResult> getExclusionFilter() { - return exclusionFilter; - } - - public void setExclusionFilter(ObjectFilter<CaptureSearchResult> exclusionFilter) { - this.exclusionFilter = exclusionFilter; - } - public Set<String> keySet() { return filters.keySet(); } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixReplayDispatcher.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -59,7 +59,7 @@ // if the result is not for the exact date requested, redirect to the // exact date. some capture dates are not 14 digits, only compare as // many digits as are in the result date: - String reqDateStr = wbRequest.get(WaybackRequest.REQUEST_EXACT_DATE); + String reqDateStr = wbRequest.getReplayTimestamp(); String resDateStr = result.getCaptureTimestamp(); if((resDateStr.length() > reqDateStr.length()) || !resDateStr.equals(reqDateStr.substring(0, resDateStr.length()))) { Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/domainprefix/DomainPrefixRequestParser.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -29,7 +29,6 @@ import javax.servlet.http.HttpServletRequest; -import org.apache.commons.httpclient.URIException; import org.archive.wayback.core.Timestamp; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; @@ -89,21 +88,19 @@ String requestUrl = getRequestString(host,httpRequest); - wbRequest.put(WaybackRequest.REQUEST_EXACT_DATE, dateStr); - wbRequest.put(WaybackRequest.REQUEST_TYPE, - WaybackRequest.REQUEST_REPLAY_QUERY); - try { - wbRequest.setRequestUrl(requestUrl); - } catch (URIException e) { - e.printStackTrace(); - wbRequest = null; - } + wbRequest.setReplayRequest(); + wbRequest.setReplayTimestamp(dateStr); + wbRequest.setRequestUrl(requestUrl); + } else { Matcher queryMatcher = QUERY_REGEX.matcher(prefix); if(queryMatcher != null && queryMatcher.matches()) { wbRequest = new WaybackRequest(); String dateStr = queryMatcher.group(1); String host = queryMatcher.group(2); + + String requestUrl = getRequestString(host,httpRequest); + String startDate; String endDate; if(dateStr.length() == 0) { @@ -113,20 +110,13 @@ startDate = Timestamp.parseBefore(dateStr).getDateStr(); endDate = Timestamp.parseAfter(dateStr).getDateStr(); } - wbRequest.put(WaybackRequest.REQUEST_START_DATE,startDate); - wbRequest.put(WaybackRequest.REQUEST_END_DATE,endDate); - wbRequest.put(WaybackRequest.REQUEST_TYPE, - WaybackRequest.REQUEST_URL_QUERY); - - String requestUrl = getRequestString(host,httpRequest); - - try { - wbRequest.setRequestUrl(requestUrl); - } catch (URIException e) { - e.printStackTrace(); - wbRequest = null; - } + wbRequest.setCaptureQueryRequest(); + wbRequest.setStartTimestamp(startDate); + wbRequest.setEndTimestamp(endDate); + wbRequest.setRequestUrl(requestUrl); } + // TODO: what if it doesn't match the QUERY_REGEX? + // throw a BadQueryException? } } } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/exception/BaseExceptionRenderer.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -68,13 +68,13 @@ if (wbRequest == null) { return false; } - String referer = wbRequest.get(WaybackRequest.REQUEST_REFERER_URL); + String referer = wbRequest.getRefererUrl(); return (referer != null && referer.length() > 0); } protected boolean requestIsImage(HttpServletRequest httpRequest, WaybackRequest wbRequest) { - String requestUrl = wbRequest.get(WaybackRequest.REQUEST_URL); + String requestUrl = wbRequest.getRequestUrl(); if (requestUrl == null) return false; Matcher matcher = IMAGE_REGEX.matcher(requestUrl); @@ -84,14 +84,14 @@ protected boolean requestIsJavascript(HttpServletRequest httpRequest, WaybackRequest wbRequest) { - String requestUrl = wbRequest.get(WaybackRequest.REQUEST_URL); + String requestUrl = wbRequest.getRequestUrl(); return (requestUrl != null) && requestUrl.endsWith(".js"); } protected boolean requestIsCSS(HttpServletRequest httpRequest, WaybackRequest wbRequest) { - String requestUrl = wbRequest.get(WaybackRequest.REQUEST_URL); + String requestUrl = wbRequest.getRequestUrl(); return (requestUrl != null) && requestUrl.endsWith(".css"); } @@ -101,9 +101,9 @@ // the "standard HTML" response handler: String jspPath = errorJsp; - if(wbRequest.isQueryRequest()) { + if(!wbRequest.isReplayRequest()) { - if(wbRequest.containsKey(WaybackRequest.REQUEST_XML_DATA)) { + if(wbRequest.isXMLMode()) { jspPath = xmlErrorJsp; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/liveweb/LiveWebCache.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -80,10 +80,8 @@ boolean bUseOlder) throws URIException { WaybackRequest req = new WaybackRequest(); req.setRequestUrl(url.toString()); - req.put(WaybackRequest.REQUEST_TYPE, - WaybackRequest.REQUEST_CLOSEST_QUERY); - req.put(WaybackRequest.REQUEST_EXACT_DATE, - Timestamp.currentTimestamp().getDateStr()); + req.setReplayRequest(); + req.setReplayTimestamp(Timestamp.currentTimestamp().getDateStr()); Timestamp earliest = null; if(bUseOlder) { earliest = Timestamp.earliestTimestamp(); @@ -91,11 +89,10 @@ Date d = new Date(System.currentTimeMillis() - maxCacheMS); earliest = new Timestamp(d); } - req.put(WaybackRequest.REQUEST_START_DATE,earliest.getDateStr()); + req.setStartTimestamp(earliest.getDateStr()); // for now, assume all live web requests are only satisfiable by the // exact host -- no massaging. - req.put(WaybackRequest.REQUEST_EXACT_HOST_ONLY, - WaybackRequest.REQUEST_YES); + req.setExactHost(true); return req; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyReplayRequestParser.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -28,7 +28,6 @@ import javax.servlet.http.HttpServletRequest; -import org.apache.commons.httpclient.URIException; import org.archive.util.InetAddressUtil; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadQueryException; @@ -85,14 +84,8 @@ String requestUrl = requestScheme + "://" + requestServer + requestPath; wbRequest = new WaybackRequest(); - try { - wbRequest.setRequestUrl(requestUrl); - } catch (URIException e) { - e.printStackTrace(); - return null; - } - wbRequest.put(WaybackRequest.REQUEST_TYPE, - WaybackRequest.REQUEST_REPLAY_QUERY); + wbRequest.setRequestUrl(requestUrl); + wbRequest.setReplayRequest(); wbRequest.setResultsPerPage(maxRecords); return wbRequest; } Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyRequestParser.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyRequestParser.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/proxy/ProxyRequestParser.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -70,8 +70,10 @@ String id = httpRequest.getHeader("Proxy-Id"); if (id == null) id = httpRequest.getRemoteAddr(); - wbRequest.put(WaybackRequest.REQUEST_EXACT_DATE, Timestamp - .getTimestampForId(httpRequest.getContextPath(), id)); + // TODO: This is hacky. + String replayDateStr = Timestamp.getTimestampForId( + httpRequest.getContextPath(), id); + wbRequest.setReplayTimestamp(replayDateStr); wbRequest.fixup(httpRequest); } return wbRequest; Modified: trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java =================================================================== --- trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java 2008-07-07 22:07:22 UTC (rev 2418) +++ trunk/archive-access/projects/wayback/wayback-core/src/main/java/org/archive/wayback/query/Renderer.java 2008-07-08 06:06:41 UTC (rev 2419) @@ -72,10 +72,10 @@ CaptureSearchResults results, ResultURIConverter uriConverter) throws ServletException, IOException { - UICaptureQueryResults uiResults = new UICaptureQueryResults(httpRequest, wbRequest, - results, uriConverter); + UICaptureQueryResults uiResults = new UICaptureQueryResults(httpRequest, + wbRequest, results, uriConverter); String jsp = captureJsp; - if(wbRequest.containsKey(WaybackRequest.REQUEST_XML_DATA)) { + if(wbRequest.isXMLMode()) { jsp = xmlCaptureJsp; } @@ -95,7 +95,7 @@ UIUrlQueryResults uiResults = new UIUrlQueryResults(httpRequest, wbRequest, results, uriConverter); String jsp = urlJsp; - if(wbRequest.containsKey(WaybackRequest.REQUEST_XML_DATA)) { + if(wbRequest.is... [truncated message content] |