|
From: Eric A. <de...@us...> - 2004-03-28 02:00:52
|
Update of /cvsroot/sprawler/sprawler In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv31736 Modified Files: indexer.conf indexer.pl master.pl Log Message: - Added routines to check for client validity. - Clients can now only check in url info for urls they themselves have checked out - Client must "register" first before being able to run. - Minor bug fixes and tweaks - some minor whitespace fixing - Config file changes (subtle) - Uses a default pre-registered clientid, but in the future, users will have to register their own. Index: indexer.pl =================================================================== RCS file: /cvsroot/sprawler/sprawler/indexer.pl,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** indexer.pl 25 Mar 2004 05:44:03 -0000 1.26 --- indexer.pl 28 Mar 2004 01:49:38 -0000 1.27 *************** *** 130,136 **** $totalurls=@urls; $| = 1; ! $k=0; foreach $url (@urls) { ! print "\rIndexing url $k of $totalurls"; $client->indexer($url); $client->flush_db($url); --- 130,136 ---- $totalurls=@urls; $| = 1; ! $k=1; foreach $url (@urls) { ! print "\rIndexing url $k of $totalurls "; $client->indexer($url); $client->flush_db($url); *************** *** 138,161 **** $k++; }; ! print " -> Done.\n"; opendir(INDEX_PATH,"$index_path"); ! @files=grep {! /^\./} readdir(INDEX_PATH); ! #my $socket=$client->connect($hostname); my $totalfiles=@files; $| = 1; ! $j = 0; foreach my $file (@files) { ! print "\rSending index $j of $totalfiles"; if($file =~ /[a-f0-9]{32}\.db/i) { $filetosend=$file; my $socket=$client->connect($hostname,$port); ! $client->send_index($filetosend,$socket); ! unlink("$index_path$filetosend"); ! $socket->close; } $j++; } ! print " -> Done.\n"; $| = 0; --- 138,164 ---- $k++; }; ! print "-> Done.\n"; opendir(INDEX_PATH,"$index_path"); ! #@files=grep {! /^\./} readdir(INDEX_PATH); ! @files=grep {/[a-f0-9]{32}\.db/} readdir(INDEX_PATH); my $totalfiles=@files; $| = 1; ! $j = 1; foreach my $file (@files) { ! print "\rSending index $j of $totalfiles "; if($file =~ /[a-f0-9]{32}\.db/i) { $filetosend=$file; my $socket=$client->connect($hostname,$port); ! if ($client->send_index($filetosend,$socket)) { ! unlink("$index_path$filetosend"); ! $socket->close; ! } else { ! print "\nERROR: sending file $filetosend.\n"; ! } } $j++; } ! print "-> Done.\n"; $| = 0; Index: master.pl =================================================================== RCS file: /cvsroot/sprawler/sprawler/master.pl,v retrieving revision 1.21 retrieving revision 1.22 diff -C2 -d -r1.21 -r1.22 *** master.pl 25 Mar 2004 04:40:35 -0000 1.21 --- master.pl 28 Mar 2004 01:49:40 -0000 1.22 *************** *** 66,70 **** ######################################################## ! $master->seed_urls();$master->logf("\n\n\nseeding urls...\n"); --- 66,71 ---- ######################################################## ! $master->seed_urls(); ! $master->logf("\n\n\nseeding urls...\n"); *************** *** 117,140 **** close PARENT; - #(my $parent_id, my $cc)=split(/\|/,$tmp); - #print "TMP $parent_id <> $cc\n"; my $action=$master->receive_request($socket,1024); ! if ($action =~ /^REQUEST_URL\s+(\d+)/) { ! my $qtyurls=$1; ! if ($max_req_urls < $qtyurls) { $qtyurls = $max_req_urls; } ! $action=""; ! my @urls=$master->get_urls($qtyurls); ! $master->logf("($line) child $child_id sending urls...\n"); ! $master->send_urls($socket,\@urls); ! $master->logf("ok\n"); ! # now we should mark the urls as "out for indexing", and save into hash ! # ! } elsif ($action eq "SEND_INDEX") { ! $action=""; ! $master->logf("($line) child $child_id recieving indexes...\n"); ! $master->receive_index($socket); ! $master->logf("ok\n"); } else { ! $master->logf("child recieved request $action\n"); } --- 118,149 ---- close PARENT; my $action=$master->receive_request($socket,1024); ! if ($action =~ /^REQUEST_URL\s+(\d+)\s+(\S+)/) { ! my $qtyurls=$1; ! my $clientid=$2; ! my $retval=$master->check_clientid($clientid,"STATUS"); ! if ($retval eq "1") { ! if ($max_req_urls < $qtyurls) { $qtyurls = $max_req_urls; } ! $action=""; ! my @urls=$master->get_urls($qtyurls,$clientid); ! $master->logf("($line) child $child_id sending urls...\n"); ! $master->send_urls($socket,\@urls); ! $master->logf("ok\n"); ! } else { ! print STDERR "Client $clientid attempted to steal from us!\n"; ! } ! } elsif ($action =~ /^SEND_INDEX\s+(\S+)/) { ! my $clientid = $1; ! my $retval=$master->check_clientid($clientid,"STATUS"); ! if ($retval eq "1") { ! $action=""; ! $master->logf("($line) child $child_id receiving indexes...\n"); ! $master->receive_index($socket,$clientid); ! $master->logf("ok\n"); ! } else { ! print STDERR "Client $clientid attempted to trick us!\n"; ! } } else { ! $master->logf("child received request $action\n"); } *************** *** 151,155 **** my $child; while ((my $waitedpid = waitpid(-1,WNOHANG)) > 0) { ! logmsg "reaped $waitedpid" . ($? ? " with exit $?" : ''); } $SIG{CHLD} = \&REAPER; # loathe sysV --- 160,164 ---- my $child; while ((my $waitedpid = waitpid(-1,WNOHANG)) > 0) { ! #logmsg "reaped $waitedpid" . ($? ? " with exit $?" : ''); } $SIG{CHLD} = \&REAPER; # loathe sysV Index: indexer.conf =================================================================== RCS file: /cvsroot/sprawler/sprawler/indexer.conf,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** indexer.conf 25 Mar 2004 04:40:35 -0000 1.8 --- indexer.conf 28 Mar 2004 01:49:38 -0000 1.9 *************** *** 9,13 **** URLS_TO_INDEX = 20 ! CLIENT_ID = TESTER1 DEFAULT_SERVER = beta.sprawler.com DEFAULT_SERVER_PORT = 5555 --- 9,13 ---- URLS_TO_INDEX = 20 ! CLIENT_ID = tes...@sp...-1031080407379 DEFAULT_SERVER = beta.sprawler.com DEFAULT_SERVER_PORT = 5555 *************** *** 15,19 **** # interval in minutes reindex_interval = 1440 ! INDEX_TYPES = text/html ! #index_ext = html,txt --- 15,18 ---- # interval in minutes reindex_interval = 1440 ! INDEX_TYPES = text/html text/plain |