From: Michael S. <sta...@us...> - 2006-11-03 03:34:40
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch In directory sc8-pr-cvs11.sourceforge.net:/tmp/cvs-serv796/src/java/org/archive/access/nutch Modified Files: ImportArcs.java Log Message: * src/java/org/archive/access/nutch/ImportArcs.java If getting collection name from ARC prefix, use ARC name from first record rather than actual name (When rsyncing here at IA, ARCs get an md5 prefix). Index: ImportArcs.java =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/src/java/org/archive/access/nutch/ImportArcs.java,v retrieving revision 1.61 retrieving revision 1.62 diff -C2 -d -r1.61 -r1.62 *** ImportArcs.java 2 Nov 2006 07:55:33 -0000 1.61 --- ImportArcs.java 3 Nov 2006 03:34:36 -0000 1.62 *************** *** 295,311 **** String arcName = null; try { ! // If empty collection name, take arc prefix. ImportArcs.this.collectionName = getConf(). get(ImportArcs.WAX_SUFFIX + ImportArcs.ARCCOLLECTION_KEY); - if ((ImportArcs.this.collectionName == null) || - (ImportArcs.this.collectionName.length() <= 0)) { - ImportArcs.this.collectionName = - getCollectionFromArcname(this.arcLocation); - } - if ((ImportArcs.this.collectionName == null) || - (ImportArcs.this.collectionName.length() == 0)) { - throw new NullPointerException("Collection name can't " - + "be empty"); - } final ParseUtil pu = new ParseUtil(getConf()); --- 295,301 ---- String arcName = null; try { ! // If empty collection name, take arc prefix later below. ImportArcs.this.collectionName = getConf(). get(ImportArcs.WAX_SUFFIX + ImportArcs.ARCCOLLECTION_KEY); final ParseUtil pu = new ParseUtil(getConf()); *************** *** 317,320 **** --- 307,320 ---- if (arcName == null) { arcName = trimARCName(rec.getMetaData().getUrl()); + if ((ImportArcs.this.collectionName == null) || + (ImportArcs.this.collectionName.length() <= 0)) { + ImportArcs.this.collectionName = + getCollectionFromArcname(arcName); + } + if ((ImportArcs.this.collectionName == null) || + (ImportArcs.this.collectionName.length() == 0)) { + throw new NullPointerException("Collection name can't " + + "be empty"); + } } if (!isIndex(rec)) { *************** *** 546,551 **** } - - protected static String getCollectionFromArcname(final String arcurl) throws URISyntaxException { --- 546,549 ---- |