From: <bi...@us...> - 2009-06-25 20:21:14
|
Revision: 2744 http://archive-access.svn.sourceforge.net/archive-access/?rev=2744&view=rev Author: binzino Date: 2009-06-25 20:21:06 +0000 (Thu, 25 Jun 2009) Log Message: ----------- WAX-47: Use 'url' field rather than 'exacturl' field as the former will (should) always be present whereas the latter may not. Modified Paths: -------------- tags/nutchwax-0_12_5/archive/src/nutch/src/java/org/apache/nutch/searcher/IndexSearcher.java Modified: tags/nutchwax-0_12_5/archive/src/nutch/src/java/org/apache/nutch/searcher/IndexSearcher.java =================================================================== --- tags/nutchwax-0_12_5/archive/src/nutch/src/java/org/apache/nutch/searcher/IndexSearcher.java 2009-06-23 21:35:00 UTC (rev 2743) +++ tags/nutchwax-0_12_5/archive/src/nutch/src/java/org/apache/nutch/searcher/IndexSearcher.java 2009-06-25 20:21:06 UTC (rev 2744) @@ -173,10 +173,10 @@ { if ( "site".equals( dedupField ) ) { - String exactUrl = reader.document( doc ).get( "exacturl"); + String url = reader.document( doc ).get( "url"); try { - java.net.URL u = new java.net.URL( exactUrl ); + java.net.URL u = new java.net.URL( url ); dedupValue = u.getHost(); System.out.println("Dedup value hack:" + dedupValue); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |