From: <bi...@us...> - 2008-06-26 22:35:23
|
Revision: 2331 http://archive-access.svn.sourceforge.net/archive-access/?rev=2331&view=rev Author: binzino Date: 2008-06-26 15:35:33 -0700 (Thu, 26 Jun 2008) Log Message: ----------- Add WaybackURLFilter configuration. Add archive-digest field to indexing and query plugins configurations. Modified Paths: -------------- trunk/archive-access/projects/nutchwax/archive/conf/nutch-site.xml Modified: trunk/archive-access/projects/nutchwax/archive/conf/nutch-site.xml =================================================================== --- trunk/archive-access/projects/nutchwax/archive/conf/nutch-site.xml 2008-06-26 22:34:24 UTC (rev 2330) +++ trunk/archive-access/projects/nutchwax/archive/conf/nutch-site.xml 2008-06-26 22:35:33 UTC (rev 2331) @@ -10,7 +10,7 @@ <!-- Add 'index-nutchwax' and 'query-nutchwax' to plugin list. --> <!-- Also, add 'parse-pdf' --> <!-- Remove 'urlfilter-regex' and 'normalizer-(pass|regex|basic)' --> - <value>protocol-http|parse-(text|html|js|pdf)|index-(basic|anchor|nutchwax)|query-(basic|site|url|nutchwax)|summary-basic|scoring-opic</value> + <value>protocol-http|parse-(text|html|js|pdf)|index-(basic|anchor|nutchwax)|query-(basic|site|url|nutchwax)|summary-basic|scoring-opic|urlfilter-nutchwax</value> </property> <property> @@ -24,6 +24,7 @@ --> <name>nutchwax.filter.index</name> <value> + archive-digest:false arcname:false collection date @@ -45,6 +46,7 @@ <!-- We do *not* use this filter for handling "date" queries, there is a specific filter for that: DateQueryFilter --> <name>nutchwax.filter.query</name> <value> + raw:archive-digest:false raw:arcname:false group:collection group:type @@ -62,8 +64,19 @@ <property> <name>mime.type.magic</name> <value>false</value> - <description>Defines if the mime content type detector uses magic resolution. - </description> + <description>Defines if the mime content type detector uses magic resolution.</description> </property> +<property> + <name>nutchwax.urlfilter.wayback.exclusions</name> + <value></value> + <description>Path to file containing list of exclusions.</description> +</property> + +<property> + <name>nutchwax.urlfilter.wayback.canonicalizer</name> + <value>org.archive.wayback.util.url.AggressiveUrlCanonicalizer</value> + <description></description> +</property> + </configuration> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |