From: <bi...@us...> - 2009-07-07 22:07:25
|
Revision: 2752 http://archive-access.svn.sourceforge.net/archive-access/?rev=2752&view=rev Author: binzino Date: 2009-07-07 22:07:17 +0000 (Tue, 07 Jul 2009) Log Message: ----------- WAX-52. Added -d <dir> option to NutchWaxBean. Also added commands for index merging and searching to the 'nutchwax' script. Modified Paths: -------------- tags/nutchwax-0_12_6/archive/bin/nutchwax tags/nutchwax-0_12_6/archive/src/java/org/archive/nutchwax/NutchWaxBean.java Modified: tags/nutchwax-0_12_6/archive/bin/nutchwax =================================================================== --- tags/nutchwax-0_12_6/archive/bin/nutchwax 2009-07-07 21:53:03 UTC (rev 2751) +++ tags/nutchwax-0_12_6/archive/bin/nutchwax 2009-07-07 22:07:17 UTC (rev 2752) @@ -50,22 +50,30 @@ shift ${NUTCH_HOME}/bin/nutch org.archive.nutchwax.PageRankDbMerger $@ ;; + pageranker) + shift + ${NUTCH_HOME}/bin/nutch org.archive.nutchwax.tools.PageRanker $@ + ;; + parsetextmerger) + shift + ${NUTCH_HOME}/bin/nutch org.archive.nutchwax.tools.ParseTextCombiner $@ + ;; add-dates) shift ${NUTCH_HOME}/bin/nutch org.archive.nutchwax.tools.DateAdder $@ ;; + merge) + shift + ${NUTCH_HOME}/bin/nutch org.archive.nutchwax.IndexMerger $@ + ;; dumpindex) shift ${NUTCH_HOME}/bin/nutch org.archive.nutchwax.tools.DumpParallelIndex $@ ;; - pageranker) + search) shift - ${NUTCH_HOME}/bin/nutch org.archive.nutchwax.tools.PageRanker $@ + ${NUTCH_HOME}/bin/nutch org.archive.nutchwax.NutchWaxBean $@ ;; - parsetextmerger) - shift - ${NUTCH_HOME}/bin/nutch org.archive.nutchwax.tools.ParseTextCombiner $@ - ;; *) echo "" echo "Usage: nutchwax COMMAND" @@ -76,7 +84,9 @@ echo " pageranker Generate pagerank.txt file from 'pagerankdb's or 'linkdb's" echo " parsetextmerger Merge segement parse_text/part-nnnnn directories." echo " add-dates Add dates to a parallel index" + echo " merge Merge indexes or parallel indexes" echo " dumpindex Dump an index or set of parallel indices to stdout" + echo " search Query a search index" echo "" exit 1 ;; Modified: tags/nutchwax-0_12_6/archive/src/java/org/archive/nutchwax/NutchWaxBean.java =================================================================== --- tags/nutchwax-0_12_6/archive/src/java/org/archive/nutchwax/NutchWaxBean.java 2009-07-07 21:53:03 UTC (rev 2751) +++ tags/nutchwax-0_12_6/archive/src/java/org/archive/nutchwax/NutchWaxBean.java 2009-07-07 22:07:17 UTC (rev 2752) @@ -254,6 +254,7 @@ String usage = "NutchWaxBean [options] query" + "\n\t-h <n> Hits per site" + "\n\t-n <n> Number of results to find" + + "\n\t-d <dir> Search directory" + "\n"; if ( args.length == 0 ) @@ -263,6 +264,7 @@ } String queryString = args[args.length - 1]; + String searchDir = null; int hitsPerSite = 0; int numHits = 10; for ( int i = 0 ; i < args.length - 1 ; i++ ) @@ -279,6 +281,11 @@ i++; numHits = Integer.parseInt( args[i] ); } + if ( "-d".equals( args[i] ) ) + { + i++; + searchDir = args[i]; + } } catch ( NumberFormatException nfe ) { @@ -290,9 +297,15 @@ Configuration conf = NutchConfiguration.create(); + if ( searchDir != null ) + { + conf.set( "searcher.dir", searchDir ); + } NutchBean bean = new NutchBean(conf); NutchBeanModifier.modify( bean ); + System.out.println( "Searching in directory: " + conf.get( "searcher.dir" ) ); + Query query = Query.parse(queryString, conf); System.out.println("Hits per site: " + hitsPerSite); Hits hits = bean.search(query, numHits, hitsPerSite); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |