From: Michael S. <sta...@us...> - 2005-10-06 19:28:50
|
Update of /cvsroot/archive-access/archive-access/projects/nutch/conf In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22427/conf Modified Files: nutch-site.xml.all Log Message: * conf/nutch-site.xml.all Enable indexing of all in the default nutchwax config (.all). Index: nutch-site.xml.all =================================================================== RCS file: /cvsroot/archive-access/archive-access/projects/nutch/conf/nutch-site.xml.all,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** nutch-site.xml.all 6 Oct 2005 01:07:31 -0000 1.2 --- nutch-site.xml.all 6 Oct 2005 19:28:40 -0000 1.3 *************** *** 1,5 **** <?xml version="1.0"?> ! <!-- Internet Archive Nutch configuration --> <nutch-conf> --- 1,7 ---- <?xml version="1.0"?> ! <!--Internet Archive Nutch configuration. ! This config. is what gets built into nutchwax. ! --> <nutch-conf> *************** *** 8,12 **** ! <!-- enable parse-ext --> <property> <name>plugin.includes</name> --- 10,16 ---- ! <!-- Enable parse-ext (parse-ext is a parser that calls the 'ext'ernal program ! xpdf to parse pdf files. Also enable parse-default and the ia plugins. ! --> <property> <name>plugin.includes</name> *************** *** 50,53 **** --- 54,59 ---- </property> + + <!-- For lucene indexes, normally. The default is 128. Write every 1024 entries rather than every 128, the default. *************** *** 78,81 **** --- 84,88 ---- </property> + <!-- make summaries a little longer than the default --> <property> *************** *** 96,110 **** </property> ! <!-- the name of the archive server hosting this archive --> <property> ! <name>archive.host</name> ! <value>crawls.archive.org</value> </property> ! <!-- the name of this archive collection --> <property> <name>archive.collection</name> <value>be05</value> </property> <!--Optionally, hardcode the nutch datadir location rather --- 103,122 ---- </property> ! <!-- the name of the server hosting collections.--> <property> ! <name>collections.host</name> ! <value>collections.example.org</value> </property> ! <!-- The name of this archive collection. ! DEPRECATED. Now search.jsp uses the 'collection' returned by the search ! result drawing up the wayback URL and at index time, use the ! command-line 'collection' option. ! <property> <name>archive.collection</name> <value>be05</value> </property> + --> <!--Optionally, hardcode the nutch datadir location rather *************** *** 121,125 **** <property> <name>archive.index.all</name> ! <value>false</value> </property> </nutch-conf> --- 133,137 ---- <property> <name>archive.index.all</name> ! <value>true</value> </property> </nutch-conf> |