|
From: <go...@us...> - 2003-09-06 01:44:08
|
Update of /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/basic
In directory sc8-pr-cvs1:/tmp/cvs-serv15073/src/org/archive/crawler/basic
Modified Files:
SimplePreconditionEnforcer.java
Log Message:
chaff threshold enforced
Index: SimplePreconditionEnforcer.java
===================================================================
RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/basic/SimplePreconditionEnforcer.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -d -r1.8 -r1.9
*** SimplePreconditionEnforcer.java 6 Aug 2003 01:21:21 -0000 1.8
--- SimplePreconditionEnforcer.java 6 Sep 2003 01:44:05 -0000 1.9
***************
*** 25,30 ****
--- 25,32 ----
private static String XP_DELAY_FACTOR = "//params/@delay-factor";
private static String XP_MINIMUM_DELAY = "//params/@minimum-delay";
+ private static String XP_CHAFF_THRESHOLD = "//params/@chaff-threshold";
private static int DEFAULT_DELAY_FACTOR = 10;
private static int DEFAULT_MINIMUM_DELAY = 2000;
+ private static int DEFAULT_CHAFF_THRESHOLD = 3;
private static Logger logger = Logger.getLogger("org.archive.crawler.basic.SimplePolitenessEnforcer");
***************
*** 35,38 ****
--- 37,44 ----
protected void innerProcess(CrawlURI curi) {
+ if (considerChaff(curi)) {
+ return;
+ }
+
if (considerDnsPreconditions(curi)) {
return;
***************
*** 57,60 ****
--- 63,81 ----
return;
+ }
+
+ /**
+ * @param curi
+ * @return
+ */
+ private boolean considerChaff(CrawlURI curi) {
+ //if (curi.getChaffness()>1) {
+ // System.out.println(curi.getChaffness()+" "+curi.getUURI().toString());
+ //}
if(curi.getChaffness()>getIntAt(XP_CHAFF_THRESHOLD,DEFAULT_CHAFF_THRESHOLD)) {
+ curi.setFetchStatus(S_DEEMED_CHAFF);
+ curi.cancelFurtherProcessing();
+ return true;
+ }
+ return false;
}
|