From: <bi...@us...> - 2008-06-26 22:36:11
|
Revision: 2332 http://archive-access.svn.sourceforge.net/archive-access/?rev=2332&view=rev Author: binzino Date: 2008-06-26 15:36:20 -0700 (Thu, 26 Jun 2008) Log Message: ----------- Initial revision. Added Paths: ----------- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/tools/DumpParallelIndex.java Added: trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/tools/DumpParallelIndex.java =================================================================== --- trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/tools/DumpParallelIndex.java (rev 0) +++ trunk/archive-access/projects/nutchwax/archive/src/java/org/archive/nutchwax/tools/DumpParallelIndex.java 2008-06-26 22:36:20 UTC (rev 2332) @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2008 Internet Archive. + * + * This file is part of the archive-access tools project + * (http://sourceforge.net/projects/archive-access). + * + * The archive-access tools are free software; you can redistribute them and/or + * modify them under the terms of the GNU Lesser Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or any + * later version. + * + * The archive-access tools are distributed in the hope that they will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + * Public License for more details. + * + * You should have received a copy of the GNU Lesser Public License along with + * the archive-access tools; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +package org.archive.nutchwax.tools; + +import java.io.File; +import java.util.Iterator; +import java.util.Arrays; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.ArchiveParallelReader; + +public class DumpParallelIndex +{ + public static void main( String[] args ) throws Exception + { + String option = ""; + String indexDir = ""; + + if ( args.length < 1 ) + { + usageAndExit( ); + } + + int offset = 0; + if ( args[0].equals( "-f" ) ) + { + offset = 1; + } + + String dirs[] = new String[args.length - offset]; + System.arraycopy( args, offset, dirs, 0, args.length - offset ); + + ArchiveParallelReader reader = new ArchiveParallelReader( ); + for ( String dir : dirs ) + { + reader.add( IndexReader.open( dir ) ); + } + + if ( offset > 0 ) + { + listFields( reader ); + } + else + { + dumpIndex( reader ); + } + } + + private static void dumpIndex( IndexReader reader ) throws Exception + { + Object[] fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL).toArray(); + + for (int i = 0; i < fieldNames.length; i++) + { + System.out.print(fieldNames[i] + "\t"); + } + + System.out.println(); + + int numDocs = reader.numDocs(); + + for (int i = 0; i < numDocs; i++) + { + for (int j = 0; j < fieldNames.length; j++) + { + System.out.print( Arrays.toString( reader.document(i).getValues((String) fieldNames[j])) + "\t" ); + } + + System.out.println(); + } + } + + private static void listFields( IndexReader reader ) throws Exception + { + Iterator it = reader.getFieldNames(IndexReader.FieldOption.ALL).iterator(); + + while (it.hasNext()) + { + System.out.println(it.next()); + } + + reader.close(); + } + + private static void usageAndExit() + { + System.out.println("Usage: DumpParallelIndex [-f] index1 ... indexN"); + System.exit(1); + } +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |