Revision: 2345 http://archive-access.svn.sourceforge.net/archive-access/?rev=2345&view=rev Author: binzino Date: 2008-06-30 13:38:36 -0700 (Mon, 30 Jun 2008) Log Message: ----------- Changed logic to that the RangeQuery has a boost of 0.0f and is always required. This is necessary for Nutch to auto-convert the RangeQuery into a RangeFilter. Added class-level JavaDoc. Modified Paths: -------------- trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/DateQueryFilter.java Modified: trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/DateQueryFilter.java =================================================================== --- trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/DateQueryFilter.java 2008-06-29 03:07:43 UTC (rev 2344) +++ trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/DateQueryFilter.java 2008-06-30 20:38:36 UTC (rev 2345) @@ -37,7 +37,28 @@ import org.apache.nutch.searcher.QueryFilter; /** - * + * <p> + * Filter on a date or date range. This filter assumes the dates + * are in field named "date" and adhere to the IA 14-digit date + * format: YYYYMMDDHHMMSS + * </p> + * <p> + * Date values in the query can have less than the full 14-digit + * precision. In that case, they are converted into a range over + * given precision. For example, "date:2007" is automagically + * converted into "date[20070000000000-20079999999999]". + * </p> + * <p> + * NOTE: In order for this filter to take advantage of the Nutch + * auto-magic conversion of RangeQuery into RangeFilter, we have to + * create the RangeQuery with: + * <ul> + * <li>occur = BooleanClause.Occur.MUST</li> + * <li>boost = 0.0f;</li> + * </ul> + * These are the two conditions that Nutch's LuceneQueryOptimizer + * checks before doing a RangeQuery->RangeFilter conversion. + * </p> */ public class DateQueryFilter implements QueryFilter { @@ -122,17 +143,14 @@ return ; } + // Otherwise make it plain-old TermQuery to match the exact date. TermQuery term = new TermQuery( new Term( FIELD, date ) ); - // Set boost on term? - // term.setBoolst( boost ); + // Not strictly required since this is a TermQuery and not a + // RangeQuery, but we use the same 0.0f boost for consistency. + term.setBoost( 0.0f ); - output.add( term, - ( clause.isProhibited() - ? BooleanClause.Occur.MUST_NOT - : ( clause.isRequired() - ? BooleanClause.Occur.MUST - : BooleanClause.Occur.SHOULD ) ) ); + output.add( term, BooleanClause.Occur.MUST ); } private void doRangeQuery( BooleanQuery output, Clause clause, String lower, String upper ) @@ -143,16 +161,11 @@ RangeQuery range = new RangeQuery( new Term( FIELD, lower ), new Term( FIELD, upper ), true ); - - // Set boost on range query? - // range.setBoost( boost ); - output.add( range, - ( clause.isProhibited() - ? BooleanClause.Occur.MUST_NOT - : ( clause.isRequired() - ? BooleanClause.Occur.MUST - : BooleanClause.Occur.SHOULD ) ) ); + // Required for LuceneQueryOptimizer to convert to RangeFilter. + range.setBoost( 0.0f ); + + output.add( range, BooleanClause.Occur.MUST ); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |