From: <go...@us...> - 2003-09-12 02:02:29
|
Update of /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/datamodel In directory sc8-pr-cvs1:/tmp/cvs-serv28250/src/org/archive/crawler/datamodel Modified Files: CrawlURI.java Log Message: improved link-hop, embed-hop handling: embeds get a separate leash distance from last link Index: CrawlURI.java =================================================================== RCS file: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/org/archive/crawler/datamodel/CrawlURI.java,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** CrawlURI.java 6 Sep 2003 02:00:13 -0000 1.37 --- CrawlURI.java 12 Sep 2003 02:02:24 -0000 1.38 *************** *** 62,65 **** --- 62,68 ---- private int chaffness = 0; // suspiciousness of being of chaff + private int linkHopCount = -1; // from seeds + private int embedHopCount = -1; // from a sure link + private int threadNumber; *************** *** 461,470 **** * @param sourceCuri */ ! public void setVia(CrawlURI sourceCuri) { via = sourceCuri; } /* public boolean isFubared(){ return ( fetchStatus < 0 && numberOfFetchAttempts >= 3); }*/ } --- 464,518 ---- * @param sourceCuri */ ! public void setViaLinkFrom(CrawlURI sourceCuri) { via = sourceCuri; + int candidateLinkHopCount = sourceCuri.getLinkHopCount()+1; + embedHopCount = 0; + if (linkHopCount == -1) { + linkHopCount = candidateLinkHopCount; + return; + } + if (linkHopCount > candidateLinkHopCount) { + linkHopCount = candidateLinkHopCount; + } + } + + /** + * @param sourceCuri + */ + public void setViaEmbedFrom(CrawlURI sourceCuri) { + via = sourceCuri; + int candidateLinkHopCount = sourceCuri.getLinkHopCount(); + if (linkHopCount == -1) { + linkHopCount = candidateLinkHopCount; + } else if (linkHopCount > candidateLinkHopCount) { + linkHopCount = candidateLinkHopCount; + } + int candidateEmbedHopCount = sourceCuri.getEmbedHopCount()+1; + if (embedHopCount == -1) { + embedHopCount = candidateEmbedHopCount; + } else if (embedHopCount > candidateEmbedHopCount) { + embedHopCount = candidateEmbedHopCount; + } } + /* public boolean isFubared(){ return ( fetchStatus < 0 && numberOfFetchAttempts >= 3); }*/ + + + /** + * @return + */ + public int getEmbedHopCount() { + return embedHopCount; + } + + /** + * @return + */ + public int getLinkHopCount() { + return linkHopCount; + } + } |