From: <bi...@us...> - 2009-06-23 21:13:31
|
Revision: 2740 http://archive-access.svn.sourceforge.net/archive-access/?rev=2740&view=rev Author: binzino Date: 2009-06-23 21:13:28 +0000 (Tue, 23 Jun 2009) Log Message: ----------- Fix WAX-46. Added command-line option to only dump a single field. Also added option to only output the # of records in the index. Modified Paths: -------------- tags/nutchwax-0_12_5/archive/src/java/org/archive/nutchwax/tools/DumpParallelIndex.java Modified: tags/nutchwax-0_12_5/archive/src/java/org/archive/nutchwax/tools/DumpParallelIndex.java =================================================================== --- tags/nutchwax-0_12_5/archive/src/java/org/archive/nutchwax/tools/DumpParallelIndex.java 2009-06-22 21:29:05 UTC (rev 2739) +++ tags/nutchwax-0_12_5/archive/src/java/org/archive/nutchwax/tools/DumpParallelIndex.java 2009-06-23 21:13:28 UTC (rev 2740) @@ -23,6 +23,7 @@ import java.io.File; import java.util.Iterator; import java.util.Arrays; +import java.util.Collection; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.ArchiveParallelReader; @@ -37,10 +38,19 @@ } int offset = 0; - if ( args[0].equals( "-f" ) ) + if ( args[0].equals( "-l" ) || args[0].equals( "-c" ) ) { offset = 1; } + if ( args[0].equals( "-f" ) ) + { + if ( args.length < 2 ) + { + System.out.println( "Error: missing argument to -f\n" ); + usageAndExit( ); + } + offset = 2; + } String dirs[] = new String[args.length - offset]; System.arraycopy( args, offset, dirs, 0, args.length - offset ); @@ -51,23 +61,51 @@ reader.add( IndexReader.open( dir ) ); } - if ( offset > 0 ) + if ( args[0].equals( "-l" ) ) { listFields( reader ); } + else if ( args[0].equals( "-c" ) ) + { + countDocs( reader ); + } + else if ( args[0].equals( "-f" ) ) + { + dumpIndex( reader, args[1] ); + } else { dumpIndex( reader ); } } + private static void dumpIndex( IndexReader reader, String fieldName ) throws Exception + { + Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL); + + if ( ! fieldNames.contains( fieldName ) ) + { + System.out.println( "Field not in index: " + fieldName ); + System.exit( 2 ); + } + + int numDocs = reader.numDocs(); + + for (int i = 0; i < numDocs; i++) + { + System.out.println( Arrays.toString( reader.document(i).getValues( (String) fieldName ) ) ); + } + + } + private static void dumpIndex( IndexReader reader ) throws Exception { - Object[] fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL).toArray(); + Object[] fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL).toArray( ); + Arrays.sort( fieldNames ); - for (int i = 0; i < fieldNames.length; i++) + for ( int i = 0; i < fieldNames.length; i++ ) { - System.out.print(fieldNames[i] + "\t"); + System.out.print( fieldNames[i] + "\t" ); } System.out.println(); @@ -87,19 +125,27 @@ private static void listFields( IndexReader reader ) throws Exception { - Iterator it = reader.getFieldNames(IndexReader.FieldOption.ALL).iterator(); - - while (it.hasNext()) + Object[] fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL).toArray( ); + Arrays.sort( fieldNames ); + + for ( int i = 0; i < fieldNames.length; i++ ) { - System.out.println(it.next()); + System.out.println( fieldNames[i] ); } - - reader.close(); } + private static void countDocs( IndexReader reader ) throws Exception + { + System.out.println( reader.numDocs( ) ); + } + private static void usageAndExit() { - System.out.println("Usage: DumpParallelIndex [-f] index1 ... indexN"); + System.out.println( "Usage: DumpParallelIndex [option] index1 ... indexN" ); + System.out.println( "Options:" ); + System.out.println( " -c Emit document count" ); + System.out.println( " -f <fieldname> Only dump specified field" ); + System.out.println( " -l List fields in index" ); System.exit(1); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |