|
From: Eric A. <de...@us...> - 2004-03-25 04:51:20
|
Update of /cvsroot/sprawler/sprawler In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv26241 Modified Files: indexer.conf indexer.pl master.pl Log Message: - added function from Ilya to check headers for content types - small bug fixes - other little stuff Index: indexer.pl =================================================================== RCS file: /cvsroot/sprawler/sprawler/indexer.pl,v retrieving revision 1.24 retrieving revision 1.25 diff -C2 -d -r1.24 -r1.25 *** indexer.pl 14 Mar 2004 05:54:25 -0000 1.24 --- indexer.pl 25 Mar 2004 04:40:35 -0000 1.25 *************** *** 93,97 **** @urlpaths=$client->get("URLPATHS"); $reindex_interval=$client->get("REINDEX_INTERVAL"); ! @index_ext=$client->get("INDEX_EXT"); $contexts=$client->get("CONTEXTS"); $cachesize=$client->get("MAXCACHEDSIZE"); --- 93,97 ---- @urlpaths=$client->get("URLPATHS"); $reindex_interval=$client->get("REINDEX_INTERVAL"); ! @index_ext=$client->get("INDEX_TYPES"); $contexts=$client->get("CONTEXTS"); $cachesize=$client->get("MAXCACHEDSIZE"); *************** *** 99,107 **** ! print "index path: $index_path\n" if $debug; #print "document paths: @docpaths\n" if $debug; #print "url locations: @urlpaths\n" if $debug; #print "reindex interval (mins): $reindex_interval\n" if $debug; ! print "indexable extensions: @index_ext\n" if $debug; #print "known languages: @languages\n" if $debug; --- 99,107 ---- ! print "Index path: $index_path\n" if $debug; #print "document paths: @docpaths\n" if $debug; #print "url locations: @urlpaths\n" if $debug; #print "reindex interval (mins): $reindex_interval\n" if $debug; ! print "Indexable content types: @index_ext\n" if $debug; #print "known languages: @languages\n" if $debug; *************** *** 129,133 **** #print "indexing urls\n"; foreach $url (@urls) { ! #print "indexing $url\n"; $client->indexer($url); $client->flush_db($url); --- 129,133 ---- #print "indexing urls\n"; foreach $url (@urls) { ! #print "Indexing $url\n"; $client->indexer($url); $client->flush_db($url); *************** *** 135,139 **** }; ! print "sending indexes\n"; opendir(INDEX_PATH,"$index_path"); @files=grep {! /^\./} readdir(INDEX_PATH); --- 135,139 ---- }; ! print "Sending indexes\n"; opendir(INDEX_PATH,"$index_path"); @files=grep {! /^\./} readdir(INDEX_PATH); Index: master.pl =================================================================== RCS file: /cvsroot/sprawler/sprawler/master.pl,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** master.pl 15 Mar 2004 05:15:12 -0000 1.20 --- master.pl 25 Mar 2004 04:40:35 -0000 1.21 *************** *** 1,3 **** ! #!/usr/bin/perl -wT --- 1,3 ---- ! #!/usr/bin/perl -w Index: indexer.conf =================================================================== RCS file: /cvsroot/sprawler/sprawler/indexer.conf,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** indexer.conf 10 Mar 2004 05:42:26 -0000 1.7 --- indexer.conf 25 Mar 2004 04:40:35 -0000 1.8 *************** *** 15,19 **** # interval in minutes reindex_interval = 1440 ! index_ext = html #index_ext = html,txt --- 15,19 ---- # interval in minutes reindex_interval = 1440 ! INDEX_TYPES = text/html #index_ext = html,txt |