From: <bi...@us...> - 2009-06-23 21:17:33
|
Revision: 2742 http://archive-access.svn.sourceforge.net/archive-access/?rev=2742&view=rev Author: binzino Date: 2009-06-23 21:17:31 +0000 (Tue, 23 Jun 2009) Log Message: ----------- Changed getUrl() to getKey() and added code to synthesize the key from the URL and the digest value rather than relying on the "orig" field holding the key. This is to eliminate storing the key explicitly when it can be easily computed; saving space in the index. Modified Paths: -------------- tags/nutchwax-0_12_5/archive/src/nutch/src/java/org/apache/nutch/searcher/FetchedSegments.java Modified: tags/nutchwax-0_12_5/archive/src/nutch/src/java/org/apache/nutch/searcher/FetchedSegments.java =================================================================== --- tags/nutchwax-0_12_5/archive/src/nutch/src/java/org/apache/nutch/searcher/FetchedSegments.java 2009-06-23 21:15:29 UTC (rev 2741) +++ tags/nutchwax-0_12_5/archive/src/nutch/src/java/org/apache/nutch/searcher/FetchedSegments.java 2009-06-23 21:17:31 UTC (rev 2742) @@ -241,20 +241,20 @@ } public byte[] getContent(HitDetails details) throws IOException { - return getSegment(details).getContent(getUrl(details)); + return getSegment(details).getContent(getKey(details)); } public ParseData getParseData(HitDetails details) throws IOException { - return getSegment(details).getParseData(getUrl(details)); + return getSegment(details).getParseData(getKey(details)); } public long getFetchDate(HitDetails details) throws IOException { - return getSegment(details).getCrawlDatum(getUrl(details)) + return getSegment(details).getCrawlDatum(getKey(details)) .getFetchTime(); } public ParseText getParseText(HitDetails details) throws IOException { - return getSegment(details).getParseText(getUrl(details)); + return getSegment(details).getParseText(getKey(details)); } public Summary getSummary(HitDetails details, Query query) @@ -269,7 +269,7 @@ { try { - ParseText parseText = segment.getParseText(getUrl(details)); + ParseText parseText = segment.getParseText(getKey(details)); text = (parseText != null) ? parseText.getText() : ""; } catch ( Exception e ) @@ -380,11 +380,8 @@ } } - private Text getUrl(HitDetails details) { - String url = details.getValue("orig"); - if (StringUtils.isBlank(url)) { - url = details.getValue("url"); - } + private Text getKey(HitDetails details) { + String url = details.getValue("url") + " " + details.getValue("digest"); return new Text(url); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |