Update of /cvsroot/archive-access/archive-access/projects/nutch/conf
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22427/conf
Modified Files:
nutch-site.xml.all
Log Message:
* conf/nutch-site.xml.all
Enable indexing of all in the default nutchwax config (.all).
Index: nutch-site.xml.all
===================================================================
RCS file: /cvsroot/archive-access/archive-access/projects/nutch/conf/nutch-site.xml.all,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** nutch-site.xml.all 6 Oct 2005 01:07:31 -0000 1.2
--- nutch-site.xml.all 6 Oct 2005 19:28:40 -0000 1.3
***************
*** 1,5 ****
<?xml version="1.0"?>
! <!-- Internet Archive Nutch configuration -->
<nutch-conf>
--- 1,7 ----
<?xml version="1.0"?>
! <!--Internet Archive Nutch configuration.
! This config. is what gets built into nutchwax.
! -->
<nutch-conf>
***************
*** 8,12 ****
! <!-- enable parse-ext -->
<property>
<name>plugin.includes</name>
--- 10,16 ----
! <!-- Enable parse-ext (parse-ext is a parser that calls the 'ext'ernal program
! xpdf to parse pdf files. Also enable parse-default and the ia plugins.
! -->
<property>
<name>plugin.includes</name>
***************
*** 50,53 ****
--- 54,59 ----
</property>
+
+
<!-- For lucene indexes, normally. The default is 128.
Write every 1024 entries rather than every 128, the default.
***************
*** 78,81 ****
--- 84,88 ----
</property>
+
<!-- make summaries a little longer than the default -->
<property>
***************
*** 96,110 ****
</property>
! <!-- the name of the archive server hosting this archive -->
<property>
! <name>archive.host</name>
! <value>crawls.archive.org</value>
</property>
! <!-- the name of this archive collection -->
<property>
<name>archive.collection</name>
<value>be05</value>
</property>
<!--Optionally, hardcode the nutch datadir location rather
--- 103,122 ----
</property>
! <!-- the name of the server hosting collections.-->
<property>
! <name>collections.host</name>
! <value>collections.example.org</value>
</property>
! <!-- The name of this archive collection.
! DEPRECATED. Now search.jsp uses the 'collection' returned by the search
! result drawing up the wayback URL and at index time, use the
! command-line 'collection' option.
!
<property>
<name>archive.collection</name>
<value>be05</value>
</property>
+ -->
<!--Optionally, hardcode the nutch datadir location rather
***************
*** 121,125 ****
<property>
<name>archive.index.all</name>
! <value>false</value>
</property>
</nutch-conf>
--- 133,137 ----
<property>
<name>archive.index.all</name>
! <value>true</value>
</property>
</nutch-conf>
|