Update of /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/datamodel
In directory sc8-pr-cvs1:/tmp/cvs-serv27184/src/org/archive/crawler/datamodel
Modified Files:
CrawlURI.java
Log Message:
ensure sensible link, embed counts
Index: CrawlURI.java
===================================================================
RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/datamodel/CrawlURI.java,v
retrieving revision 1.39
retrieving revision 1.40
diff -C2 -d -r1.39 -r1.40
*** CrawlURI.java 23 Sep 2003 01:16:35 -0000 1.39
--- CrawlURI.java 24 Sep 2003 01:45:26 -0000 1.40
***************
*** 40,44 ****
private long wakeTime; // if "snoozed", when this CrawlURI may awake
private long dontRetryBefore = -1;
- private int threadNumber;
// Processing progress
--- 40,43 ----
***************
*** 47,50 ****
--- 46,50 ----
private int deferrals = 0; // count of postponements for prerequisites
private int fetchAttempts = 0; // the number of fetch attempts that have been made
+ private int threadNumber;
// flexible dynamic attributes
***************
*** 54,58 ****
private CrawlURI via; // curi that led to this (lowest hops from seed)
private int linkHopCount = -1; // from seeds
! private int embedHopCount = -1; // from a sure link
////////////////////////////////////////////////////////////////////
--- 54,58 ----
private CrawlURI via; // curi that led to this (lowest hops from seed)
private int linkHopCount = -1; // from seeds
! private int embedHopCount = -1; // from a sure link; reset upon any link traversal
////////////////////////////////////////////////////////////////////
***************
*** 396,401 ****
public void setViaLinkFrom(CrawlURI sourceCuri) {
via = sourceCuri;
int candidateLinkHopCount = sourceCuri.getLinkHopCount()+1;
- embedHopCount = 0;
if (linkHopCount == -1) {
linkHopCount = candidateLinkHopCount;
--- 396,402 ----
public void setViaLinkFrom(CrawlURI sourceCuri) {
via = sourceCuri;
+ // reset embedCount -- but only back to 1 if >0, so special embed handling still applies
+ embedHopCount = (embedHopCount > 0) ? 1 : 0;
int candidateLinkHopCount = sourceCuri.getLinkHopCount()+1;
if (linkHopCount == -1) {
linkHopCount = candidateLinkHopCount;
***************
*** 444,447 ****
--- 445,456 ----
public int getLinkHopCount() {
return linkHopCount;
+ }
+
+ /**
+ *
+ */
+ public void markAsSeed() {
+ linkHopCount = 0;
+ embedHopCount = 0;
}
|