From: Doug C. <cu...@us...> - 2005-11-22 23:11:40
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32146/src/java/org/archive/access/nutch Modified Files: Tag: mapred IndexArcs.java Log Message: Fix to get correct segment name. Index: IndexArcs.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch/Attic/IndexArcs.java,v retrieving revision 1.1.2.3 retrieving revision 1.1.2.4 diff -C2 -d -r1.1.2.3 -r1.1.2.4 *** IndexArcs.java 20 Oct 2005 23:30:49 -0000 1.1.2.3 --- IndexArcs.java 22 Nov 2005 23:11:33 -0000 1.1.2.4 *************** *** 76,80 **** File linkDb = new File(crawlDir + "/linkdb"); File segments = new File(crawlDir + "/segments"); - File segment = new File(segments, getDate()); File indexes = new File(crawlDir + "/indexes"); File index = new File(crawlDir + "/index"); --- 76,79 ---- *************** *** 83,86 **** --- 82,86 ---- if (!noImport) { // import arcs + File segment = new File(segments, getDate()); LOG.info("importing arcs in " + arcsDir + " to " + segment); new ImportArcs(conf).importArcs(arcsDir, segment); *************** *** 89,93 **** if (!noUpdate) { // update crawldb LOG.info("updating crawldb in " + crawlDb); ! new CrawlDb(conf).update(crawlDb, segment); } --- 89,94 ---- if (!noUpdate) { // update crawldb LOG.info("updating crawldb in " + crawlDb); ! File[] segmentFiles = fs.listFiles(segments); ! new CrawlDb(conf).update(crawlDb, segmentFiles[segmentFiles.length-1]); } *************** *** 100,107 **** LOG.info("indexing " + crawlDir); new Indexer(conf).index(indexes,crawlDb,linkDb,fs.listFiles(segments)); } - new DeleteDuplicates(conf).dedup(new File[] { indexes }); - new IndexMerger(fs, fs.listFiles(indexes), index, tmpDir).merge(); LOG.info("IndexArcs finished: " + crawlDir); --- 101,108 ---- LOG.info("indexing " + crawlDir); new Indexer(conf).index(indexes,crawlDb,linkDb,fs.listFiles(segments)); + new DeleteDuplicates(conf).dedup(new File[] { indexes }); + new IndexMerger(fs, fs.listFiles(indexes), index, tmpDir).merge(); } LOG.info("IndexArcs finished: " + crawlDir); |