Update of /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/framework
In directory sc8-pr-cvs1:/tmp/cvs-serv14551/src/org/archive/crawler/framework
Modified Files:
CrawlController.java Filter.java URIStore.java Processor.java
Log Message:
SeedExtensionFiltering and related changes
Index: CrawlController.java
===================================================================
RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/framework/CrawlController.java,v
retrieving revision 1.26
retrieving revision 1.27
diff -C2 -d -r1.26 -r1.27
*** CrawlController.java 6 Aug 2003 01:18:43 -0000 1.26
--- CrawlController.java 19 Sep 2003 01:37:20 -0000 1.27
***************
*** 36,41 ****
public class CrawlController implements CrawlerConfigurationConstants {
-
-
private File disk;
public Logger uriProcessing = Logger.getLogger("uri-processing");
--- 36,39 ----
***************
*** 52,56 ****
URIStore store;
URISelector selector;
!
Processor firstProcessor;
LinkedHashMap processors = new LinkedHashMap();
--- 50,54 ----
URIStore store;
URISelector selector;
!
Processor firstProcessor;
LinkedHashMap processors = new LinkedHashMap();
Index: Filter.java
===================================================================
RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/framework/Filter.java,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** Filter.java 2 Jul 2003 01:59:42 -0000 1.7
--- Filter.java 19 Sep 2003 01:37:20 -0000 1.8
***************
*** 32,36 ****
protected abstract boolean innerAccepts(Object o);
! public void initialize() {
setName(getStringAt("@name"));
if("not".equals(getStringAt("@modifier"))) {
--- 32,36 ----
protected abstract boolean innerAccepts(Object o);
! public void initialize(CrawlController controller) {
setName(getStringAt("@name"));
if("not".equals(getStringAt("@modifier"))) {
Index: URIStore.java
===================================================================
RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/framework/URIStore.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** URIStore.java 17 Jul 2003 22:21:06 -0000 1.4
--- URIStore.java 19 Sep 2003 01:37:20 -0000 1.5
***************
*** 7,10 ****
--- 7,12 ----
package org.archive.crawler.framework;
+ import java.util.Collection;
+
/**
* Handles all persistence for Scheduler and Selector, allowing
***************
*** 25,27 ****
--- 27,34 ----
public int discoveredUriCount();
+
+ /**
+ *
+ */
+ Collection getSeeds();
}
Index: Processor.java
===================================================================
RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/framework/Processor.java,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -d -r1.12 -r1.13
*** Processor.java 30 Jul 2003 01:30:26 -0000 1.12
--- Processor.java 19 Sep 2003 01:37:20 -0000 1.13
***************
*** 94,98 ****
Object o = iter.next();
Filter f = (Filter)o;
! f.initialize();
}
}
--- 94,98 ----
Object o = iter.next();
Filter f = (Filter)o;
! f.initialize(controller);
}
}
|