Revision: 2345
http://archive-access.svn.sourceforge.net/archive-access/?rev=2345&view=rev
Author: binzino
Date: 2008-06-30 13:38:36 -0700 (Mon, 30 Jun 2008)
Log Message:
-----------
Changed logic to that the RangeQuery has a boost of 0.0f and is always
required. This is necessary for Nutch to auto-convert the RangeQuery
into a RangeFilter.
Added class-level JavaDoc.
Modified Paths:
--------------
trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/DateQueryFilter.java
Modified: trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/DateQueryFilter.java
===================================================================
--- trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/DateQueryFilter.java 2008-06-29 03:07:43 UTC (rev 2344)
+++ trunk/archive-access/projects/nutchwax/archive/src/plugin/query-nutchwax/src/java/org/archive/nutchwax/query/DateQueryFilter.java 2008-06-30 20:38:36 UTC (rev 2345)
@@ -37,7 +37,28 @@
import org.apache.nutch.searcher.QueryFilter;
/**
- *
+ * <p>
+ * Filter on a date or date range. This filter assumes the dates
+ * are in field named "date" and adhere to the IA 14-digit date
+ * format: YYYYMMDDHHMMSS
+ * </p>
+ * <p>
+ * Date values in the query can have less than the full 14-digit
+ * precision. In that case, they are converted into a range over
+ * given precision. For example, "date:2007" is automagically
+ * converted into "date[20070000000000-20079999999999]".
+ * </p>
+ * <p>
+ * NOTE: In order for this filter to take advantage of the Nutch
+ * auto-magic conversion of RangeQuery into RangeFilter, we have to
+ * create the RangeQuery with:
+ * <ul>
+ * <li>occur = BooleanClause.Occur.MUST</li>
+ * <li>boost = 0.0f;</li>
+ * </ul>
+ * These are the two conditions that Nutch's LuceneQueryOptimizer
+ * checks before doing a RangeQuery->RangeFilter conversion.
+ * </p>
*/
public class DateQueryFilter implements QueryFilter
{
@@ -122,17 +143,14 @@
return ;
}
+ // Otherwise make it plain-old TermQuery to match the exact date.
TermQuery term = new TermQuery( new Term( FIELD, date ) );
- // Set boost on term?
- // term.setBoolst( boost );
+ // Not strictly required since this is a TermQuery and not a
+ // RangeQuery, but we use the same 0.0f boost for consistency.
+ term.setBoost( 0.0f );
- output.add( term,
- ( clause.isProhibited()
- ? BooleanClause.Occur.MUST_NOT
- : ( clause.isRequired()
- ? BooleanClause.Occur.MUST
- : BooleanClause.Occur.SHOULD ) ) );
+ output.add( term, BooleanClause.Occur.MUST );
}
private void doRangeQuery( BooleanQuery output, Clause clause, String lower, String upper )
@@ -143,16 +161,11 @@
RangeQuery range = new RangeQuery( new Term( FIELD, lower ),
new Term( FIELD, upper ),
true );
-
- // Set boost on range query?
- // range.setBoost( boost );
- output.add( range,
- ( clause.isProhibited()
- ? BooleanClause.Occur.MUST_NOT
- : ( clause.isRequired()
- ? BooleanClause.Occur.MUST
- : BooleanClause.Occur.SHOULD ) ) );
+ // Required for LuceneQueryOptimizer to convert to RangeFilter.
+ range.setBoost( 0.0f );
+
+ output.add( range, BooleanClause.Occur.MUST );
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|