Update of /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv3474
Modified Files:
Tag: mapred
IndexArcs.java
Log Message:
Add some command line options.
Index: IndexArcs.java
===================================================================
RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch/Attic/IndexArcs.java,v
retrieving revision 1.1.2.1
retrieving revision 1.1.2.2
diff -C2 -d -r1.1.2.1 -r1.1.2.2
*** IndexArcs.java 1 Sep 2005 18:45:29 -0000 1.1.2.1
--- IndexArcs.java 12 Oct 2005 16:49:04 -0000 1.1.2.2
***************
*** 40,45 ****
/* Import and index a set of arc files. */
public static void main(String args[]) throws Exception {
! if (args.length < 1) {
! System.out.println("Usage: IndexArcs <arcsDir> [-dir d]");
return;
}
--- 40,45 ----
/* Import and index a set of arc files. */
public static void main(String args[]) throws Exception {
! if (args.length < 2) {
! System.out.println("Usage: IndexArcs <arcsDir> <crawlDir> [-noimport] [-noinvert] [-noindex]");
return;
}
***************
*** 47,85 ****
JobConf conf = new JobConf(NutchConf.get());
! File arcsDir = null;
! File dir = new File("crawl-" + getDate());
! for (int i = 0; i < args.length; i++) {
! if ("-dir".equals(args[i])) {
! dir = new File(args[i+1]);
! i++;
! } else if (args[i] != null) {
! arcsDir = new File(args[i]);
}
}
NutchFileSystem fs = NutchFileSystem.get(conf);
- if (fs.exists(dir)) {
- throw new RuntimeException(dir + " already exists.");
- }
! LOG.info("IndexArcs started in: " + dir);
LOG.info("arcsDir = " + arcsDir);
! File linkDb = new File(dir + "/linkdb");
! File index = new File(dir + "/indexes");
! File segments = new File(dir + "/segments");
! File segment = new File(segments, getDate());
! // import arcs
! new ImportArcs(conf).importArcs(arcsDir, segment);
! // invert links
! new LinkDb(conf).invert(linkDb, segments);
! // index everything
! new Indexer(conf).index(index, linkDb, fs.listFiles(segments));
! LOG.info("IndexArcs finished: " + dir);
}
}
--- 47,93 ----
JobConf conf = new JobConf(NutchConf.get());
! File arcsDir = new File(args[0]);
! File crawlDir = new File(args[1]);
! boolean noImport = false;
! boolean noInvert = false;
! boolean noIndex = false;
!
! for (int i = 2; i < args.length; i++) {
! if ("-noimport".equals(args[i])) {
! noImport = true;
! } else if ("-noinvert".equals(args[i])) {
! noInvert = true;
! } else if ("-noindex".equals(args[i])) {
! noIndex = true;
}
}
NutchFileSystem fs = NutchFileSystem.get(conf);
! LOG.info("IndexArcs started in: " + crawlDir);
LOG.info("arcsDir = " + arcsDir);
! File linkDb = new File(crawlDir + "/linkdb");
! File segments = new File(crawlDir + "/segments");
! if (!noImport) { // import arcs
! File segment = new File(segments, getDate());
! LOG.info("importing arcs in " + arcsDir + " to " + segment);
! new ImportArcs(conf).importArcs(arcsDir, segment);
! }
! if (!noInvert) { // invert links
! LOG.info("inverting links in " + segments);
! new LinkDb(conf).invert(linkDb, segments);
! }
! if (!noIndex) { // index
! File index = new File(crawlDir + "/indexes");
! LOG.info("indexing " + crawlDir);
! new Indexer(conf).index(index, linkDb, fs.listFiles(segments));
! }
! LOG.info("IndexArcs finished: " + crawlDir);
}
}
|