Update of /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32146/src/java/org/archive/access/nutch
Modified Files:
Tag: mapred
IndexArcs.java
Log Message:
Fix to get correct segment name.
Index: IndexArcs.java
===================================================================
RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch/Attic/IndexArcs.java,v
retrieving revision 1.1.2.3
retrieving revision 1.1.2.4
diff -C2 -d -r1.1.2.3 -r1.1.2.4
*** IndexArcs.java 20 Oct 2005 23:30:49 -0000 1.1.2.3
--- IndexArcs.java 22 Nov 2005 23:11:33 -0000 1.1.2.4
***************
*** 76,80 ****
File linkDb = new File(crawlDir + "/linkdb");
File segments = new File(crawlDir + "/segments");
- File segment = new File(segments, getDate());
File indexes = new File(crawlDir + "/indexes");
File index = new File(crawlDir + "/index");
--- 76,79 ----
***************
*** 83,86 ****
--- 82,86 ----
if (!noImport) { // import arcs
+ File segment = new File(segments, getDate());
LOG.info("importing arcs in " + arcsDir + " to " + segment);
new ImportArcs(conf).importArcs(arcsDir, segment);
***************
*** 89,93 ****
if (!noUpdate) { // update crawldb
LOG.info("updating crawldb in " + crawlDb);
! new CrawlDb(conf).update(crawlDb, segment);
}
--- 89,94 ----
if (!noUpdate) { // update crawldb
LOG.info("updating crawldb in " + crawlDb);
! File[] segmentFiles = fs.listFiles(segments);
! new CrawlDb(conf).update(crawlDb, segmentFiles[segmentFiles.length-1]);
}
***************
*** 100,107 ****
LOG.info("indexing " + crawlDir);
new Indexer(conf).index(indexes,crawlDb,linkDb,fs.listFiles(segments));
}
- new DeleteDuplicates(conf).dedup(new File[] { indexes });
- new IndexMerger(fs, fs.listFiles(indexes), index, tmpDir).merge();
LOG.info("IndexArcs finished: " + crawlDir);
--- 101,108 ----
LOG.info("indexing " + crawlDir);
new Indexer(conf).index(indexes,crawlDb,linkDb,fs.listFiles(segments));
+ new DeleteDuplicates(conf).dedup(new File[] { indexes });
+ new IndexMerger(fs, fs.listFiles(indexes), index, tmpDir).merge();
}
LOG.info("IndexArcs finished: " + crawlDir);
|