|
From: <buc...@us...> - 2010-03-10 22:43:25
|
Revision: 178
http://devmon.svn.sourceforge.net/devmon/?rev=178&view=rev
Author: buchanmilne
Date: 2010-03-10 22:43:19 +0000 (Wed, 10 Mar 2010)
Log Message:
-----------
Add timeouts for all socket communication
Make some timeouts dependant on the cycle time
Log errors for any socket errors
Clean up sockets and forks better
Modified Paths:
--------------
trunk/modules/dm_snmp.pm
Modified: trunk/modules/dm_snmp.pm
===================================================================
--- trunk/modules/dm_snmp.pm 2010-03-10 22:38:54 UTC (rev 177)
+++ trunk/modules/dm_snmp.pm 2010-03-10 22:43:19 UTC (rev 178)
@@ -210,14 +210,28 @@
my $select = IO::Select->new($g{'forks'}{$fork}{'CS'});
if($select->can_read(0.01)) {
+ do_log("DEBUG SNMP: Fork $fork has data for device $dev, reading it",3) if $g{'debug'};
# Okay, we know we have something in the buffer, keep reading
# till we get an EOF
my $data_in = '';
- do {
- my $read = $g{'forks'}{$fork}{'CS'}->getline();
- if(defined $read and $read ne '') {$data_in .= $read}
- else {select undef, undef, undef, 0.001}
- } until $data_in =~ s/\nEOF\n$//s;
+ eval {
+ local $SIG{ALRM} = sub { die "Timeout waiting for EOF from fork\n" };
+ alarm 15;
+ do {
+ my $read = $g{'forks'}{$fork}{'CS'}->getline();
+ if(defined $read and $read ne '') {$data_in .= $read}
+ else {select undef, undef, undef, 0.001}
+ } until $data_in =~ s/\nEOF\n$//s;
+ alarm 0;
+ };
+ if($@) {
+ do_log("Fork $g{'forks'}{$fork}, pid $g{'forks'}{$fork}{'pid'} stalled on device $dev: $@. Killing this fork.",1);
+ kill 15, $g{'forks'}{$fork}{'pid'} or do_log("Sending $fork TERM signal failed: $!",2);
+ close $g{'forks'}{$fork}{'CS'} or do_log("Closing socket to fork $fork failed: $!",2);
+ delete $g{'forks'}{$fork};
+ next;
+ }
+ do_log("DEBUG SNMP: Fork $fork returned complete message for device $dev",3) if $g{'debug'};
# Looks like we got some data
my $hashref = thaw($data_in);
@@ -285,6 +299,7 @@
elsif (!kill 0, $pid) {
# Whoops, looks like our fork died somewhow
do_log("Fork $fork ($pid) died polling $dev",0);
+ close $g{'forks'}{$fork}{'CS'} or do_log("Closing socket to fork $fork failed: $!",1);
delete $g{'forks'}{$fork};
--$active_forks;
fork_queries();
@@ -316,7 +331,20 @@
# Now send our input to the fork
my $serialized = nfreeze($snmp_input->{$dev});
- $g{'forks'}{$fork}{'CS'}->print("$serialized\nEOF\n");
+ eval {
+ local $SIG{ALRM} = sub { die "Timeout sending polling task data to fork\n" };
+ alarm 15;
+ $g{'forks'}{$fork}{'CS'}->print("$serialized\nEOF\n");
+ alarm 0;
+ };
+ if($@) {
+ do_log("Fork $g{'forks'}{$fork}, pid $g{'forks'}{$fork}{'pid'} not responding: $@. Killing this fork.",0);
+ kill 15, $g{'forks'}{$fork}{'pid'} or do_log("Sending TERM signal to fork $fork failed: $!",0);
+ close $g{'forks'}{$fork}{'CS'} or do_log("Closing socket to fork $fork failed: $!",1);
+ delete $g{'forks'}{$fork};
+ next;
+ }
+
++$active_forks;
$g{'forks'}{$fork}{'time'} = time;
}
@@ -486,7 +514,7 @@
# messily and leave us hanging around
eval {
local $SIG{ALRM} = sub { die "Timeout\n" };
- alarm 15;
+ alarm $g{'cycletime'};
$string_in = $sock->getline();
alarm 0;
};
@@ -709,7 +737,11 @@
sub check_forks {
for my $fork (keys %{$g{'forks'}}) {
my $pid = $g{'forks'}{$fork}{'pid'};
- delete $g{'forks'}{$fork} if !kill 0, $pid;
+ if (!kill 0, $pid) {
+ do_log("Fork $fork with pid $pid died, cleaning up",3);
+ close $g{'forks'}{$fork}{'CS'} or do_log("Closing child socket failed: $!",2);
+ delete $g{'forks'}{$fork};
+ }
}
}
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|