From: <go...@us...> - 2003-09-06 01:44:08
|
Update of /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/basic In directory sc8-pr-cvs1:/tmp/cvs-serv15073/src/org/archive/crawler/basic Modified Files: SimplePreconditionEnforcer.java Log Message: chaff threshold enforced Index: SimplePreconditionEnforcer.java =================================================================== RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/basic/SimplePreconditionEnforcer.java,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** SimplePreconditionEnforcer.java 6 Aug 2003 01:21:21 -0000 1.8 --- SimplePreconditionEnforcer.java 6 Sep 2003 01:44:05 -0000 1.9 *************** *** 25,30 **** --- 25,32 ---- private static String XP_DELAY_FACTOR = "//params/@delay-factor"; private static String XP_MINIMUM_DELAY = "//params/@minimum-delay"; + private static String XP_CHAFF_THRESHOLD = "//params/@chaff-threshold"; private static int DEFAULT_DELAY_FACTOR = 10; private static int DEFAULT_MINIMUM_DELAY = 2000; + private static int DEFAULT_CHAFF_THRESHOLD = 3; private static Logger logger = Logger.getLogger("org.archive.crawler.basic.SimplePolitenessEnforcer"); *************** *** 35,38 **** --- 37,44 ---- protected void innerProcess(CrawlURI curi) { + if (considerChaff(curi)) { + return; + } + if (considerDnsPreconditions(curi)) { return; *************** *** 57,60 **** --- 63,81 ---- return; + } + + /** + * @param curi + * @return + */ + private boolean considerChaff(CrawlURI curi) { + //if (curi.getChaffness()>1) { + // System.out.println(curi.getChaffness()+" "+curi.getUURI().toString()); + //} if(curi.getChaffness()>getIntAt(XP_CHAFF_THRESHOLD,DEFAULT_CHAFF_THRESHOLD)) { + curi.setFetchStatus(S_DEEMED_CHAFF); + curi.cancelFurtherProcessing(); + return true; + } + return false; } |