Update of /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/framework In directory sc8-pr-cvs1:/tmp/cvs-serv14551/src/org/archive/crawler/framework Modified Files: CrawlController.java Filter.java URIStore.java Processor.java Log Message: SeedExtensionFiltering and related changes Index: CrawlController.java =================================================================== RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/framework/CrawlController.java,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** CrawlController.java 6 Aug 2003 01:18:43 -0000 1.26 --- CrawlController.java 19 Sep 2003 01:37:20 -0000 1.27 *************** *** 36,41 **** public class CrawlController implements CrawlerConfigurationConstants { - - private File disk; public Logger uriProcessing = Logger.getLogger("uri-processing"); --- 36,39 ---- *************** *** 52,56 **** URIStore store; URISelector selector; ! Processor firstProcessor; LinkedHashMap processors = new LinkedHashMap(); --- 50,54 ---- URIStore store; URISelector selector; ! Processor firstProcessor; LinkedHashMap processors = new LinkedHashMap(); Index: Filter.java =================================================================== RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/framework/Filter.java,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** Filter.java 2 Jul 2003 01:59:42 -0000 1.7 --- Filter.java 19 Sep 2003 01:37:20 -0000 1.8 *************** *** 32,36 **** protected abstract boolean innerAccepts(Object o); ! public void initialize() { setName(getStringAt("@name")); if("not".equals(getStringAt("@modifier"))) { --- 32,36 ---- protected abstract boolean innerAccepts(Object o); ! public void initialize(CrawlController controller) { setName(getStringAt("@name")); if("not".equals(getStringAt("@modifier"))) { Index: URIStore.java =================================================================== RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/framework/URIStore.java,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** URIStore.java 17 Jul 2003 22:21:06 -0000 1.4 --- URIStore.java 19 Sep 2003 01:37:20 -0000 1.5 *************** *** 7,10 **** --- 7,12 ---- package org.archive.crawler.framework; + import java.util.Collection; + /** * Handles all persistence for Scheduler and Selector, allowing *************** *** 25,27 **** --- 27,34 ---- public int discoveredUriCount(); + + /** + * + */ + Collection getSeeds(); } Index: Processor.java =================================================================== RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/framework/Processor.java,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** Processor.java 30 Jul 2003 01:30:26 -0000 1.12 --- Processor.java 19 Sep 2003 01:37:20 -0000 1.13 *************** *** 94,98 **** Object o = iter.next(); Filter f = (Filter)o; ! f.initialize(); } } --- 94,98 ---- Object o = iter.next(); Filter f = (Filter)o; ! f.initialize(controller); } } |