Update of /cvsroot/archive-access/archive-access/projects/nutch/conf
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27408/conf
Modified Files:
Tag: mapred
nutch-site.xml
Log Message:
Pre-au fixes.
Index: nutch-site.xml
===================================================================
RCS file: /cvsroot/archive-access/archive-access/projects/nutch/conf/nutch-site.xml,v
retrieving revision 1.24.2.3
retrieving revision 1.24.2.4
diff -C2 -d -r1.24.2.3 -r1.24.2.4
*** nutch-site.xml 1 Sep 2005 18:45:29 -0000 1.24.2.3
--- nutch-site.xml 20 Oct 2005 23:30:48 -0000 1.24.2.4
***************
*** 9,83 ****
<!-- <property> -->
<!-- <name>fs.default.name</name> -->
! <!-- <value>ia109102:8009</value> -->
<!-- </property> -->
- <property>
- <name>ndfs.name.dir</name>
- <value>/0/nutch/ndfs/names</value>
- </property>
-
- <property>
- <name>ndfs.data.dir</name>
- <value>/0/nutch/ndfs/doug,/1/nutch/ndfs/doug</value>
- </property>
-
- <property>
- <name>ndfs.replication</name>
- <value>2</value>
- </property>
-
<!-- MapReduce -->
<!-- <property> -->
<!-- <name>mapred.job.tracker</name> -->
! <!-- <value>ia109102:8010</value> -->
! <!-- </property> -->
!
! <!-- <property> -->
! <!-- <name>mapred.job.tracker.info.port</name> -->
! <!-- <value>7846</value> -->
! <!-- </property> -->
!
! <!-- <property> -->
! <!-- <name>mapred.local.dir</name> -->
! <!-- <value>/0/nutch/mapred/local</value> -->
! <!-- </property> -->
!
! <!-- <property> -->
! <!-- <name>mapred.system.dir</name> -->
! <!-- <value>/mapred/system</value> -->
! <!-- </property> -->
!
! <!-- <property> -->
! <!-- <name>mapred.task.timeout</name> -->
! <!-- <value>3600000</value> -->
<!-- </property> -->
<!-- Override a few Nutch defaults -->
-
- <!-- Enable parse-ext (parse-ext is a parser that calls the 'ext'ernal program
- xpdf to parse pdf files. Also enable parse-default and the ia plugins.
- -->
<property>
! <name>plugin.includes</name>
! <value>urlfilter-regex|parse-(text|html|ext|default)|index-(basic|ia)|query-(basic|site|url|ia)</value>
</property>
! <!-- keep all links, not just inter-host -->
! <!-- db updates will be FASTER if set to true.
! Downside is that link text from same site won't be included.
! (More valuable to take anchor text from other hosts). Use this
! if wide variety of sites to index.
! -->
<property>
! <name>db.ignore.internal.links</name>
! <value>false</value>
</property>
- <!-- use in-degree as poor-man's link analysis -->
<property>
! <name>indexer.boost.by.link.count</name>
! <value>true</value>
</property>
--- 9,38 ----
<!-- <property> -->
<!-- <name>fs.default.name</name> -->
! <!-- <value>ia109102.archive.org:8009</value> -->
<!-- </property> -->
<!-- MapReduce -->
<!-- <property> -->
<!-- <name>mapred.job.tracker</name> -->
! <!-- <value>ia109102.archive.org:8010</value> -->
<!-- </property> -->
<!-- Override a few Nutch defaults -->
<property>
! <name>archive.collection</name>
! <value>au</value>
</property>
! <!-- the name of the archive server hosting this archive -->
<property>
! <name>archive.host</name>
! <value>crawls.archive.org</value>
</property>
<property>
! <name>plugin.includes</name>
! <value>urlfilter-regex|parse-(text|html|js|ext)|index-(basic|ia)|query-(basic|site|url|ia)</value>
</property>
***************
*** 132,160 ****
</property>
- <!-- the name of the archive server hosting this archive -->
- <property>
- <name>archive.host</name>
- <value>crawls.archive.org</value>
- </property>
-
- <!-- The name of this archive collection.
- DEPRECATED. Now search.jsp uses the 'collection' returned by the search
- result drawing up the wayback URL and at index time, use the
- command-line 'collection' option.
-
- <property>
- <name>archive.collection</name>
- <value>be05</value>
- </property>
- -->
-
- <!--Optionally, hardcode the nutch datadir location rather
- than rely on tomcat startup location.
- <property>
- <name>searcher.dir</name>
- <value>/home/stack/workspace/nutch-datadir</value>
- </property>
- -->
-
<!--If set to true, all contenttypes are indexed. Otherwise we only
index text/* and application/*
--- 87,90 ----
***************
*** 164,166 ****
--- 94,97 ----
<value>false</value>
</property>
+
</nutch-conf>
|