From: <bi...@us...> - 2009-05-05 19:25:06
|
Revision: 2697 http://archive-access.svn.sourceforge.net/archive-access/?rev=2697&view=rev Author: binzino Date: 2009-05-05 19:24:16 +0000 (Tue, 05 May 2009) Log Message: ----------- Fix WAX-41. Added option to use fieldcache or not when handling searches using 'dedup' feature. Modified Paths: -------------- trunk/archive-access/projects/nutchwax/archive/src/nutch/conf/nutch-site.xml trunk/archive-access/projects/nutchwax/archive/src/nutch/src/java/org/apache/nutch/searcher/IndexSearcher.java Modified: trunk/archive-access/projects/nutchwax/archive/src/nutch/conf/nutch-site.xml =================================================================== --- trunk/archive-access/projects/nutchwax/archive/src/nutch/conf/nutch-site.xml 2009-05-05 17:52:47 UTC (rev 2696) +++ trunk/archive-access/projects/nutchwax/archive/src/nutch/conf/nutch-site.xml 2009-05-05 19:24:16 UTC (rev 2697) @@ -184,4 +184,9 @@ <value>80</value> </property> +<property> + <name>searcher.fieldcache</name> + <property>true</property> +</property> + </configuration> Modified: trunk/archive-access/projects/nutchwax/archive/src/nutch/src/java/org/apache/nutch/searcher/IndexSearcher.java =================================================================== --- trunk/archive-access/projects/nutchwax/archive/src/nutch/src/java/org/apache/nutch/searcher/IndexSearcher.java 2009-05-05 17:52:47 UTC (rev 2696) +++ trunk/archive-access/projects/nutchwax/archive/src/nutch/src/java/org/apache/nutch/searcher/IndexSearcher.java 2009-05-05 19:24:16 UTC (rev 2697) @@ -136,9 +136,9 @@ private Hits translateHits(TopDocs topDocs, String dedupField, String sortField) throws IOException { - + String[] dedupValues = null; - if (dedupField != null) + if (dedupField != null && this.conf.getBoolean( "searcher.fieldcache", true ) ) dedupValues = FieldCache.DEFAULT.getStrings(reader, dedupField); ScoreDoc[] scoreDocs = topDocs.scoreDocs; @@ -164,7 +164,33 @@ } } - String dedupValue = dedupValues == null ? null : dedupValues[doc]; + String dedupValue = ""; + if ( dedupValues != null ) + { + dedupValue = dedupValues[doc]; + } + else + { + if ( "site".equals( dedupField ) ) + { + String exactUrl = reader.document( doc ).get( "exacturl"); + try + { + java.net.URL u = new java.net.URL( exactUrl ); + dedupValue = u.getHost(); + + System.out.println("Dedup value hack:" + dedupValue); + } + catch ( java.net.MalformedURLException e ) + { + // Eat it. + } + } + else + { + dedupValue = reader.document( doc ).get( dedupField ); + } + } hits[i] = new Hit(doc, sortValue, dedupValue); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |