From: <buc...@us...> - 2009-01-09 14:34:08
|
Revision: 113 http://devmon.svn.sourceforge.net/devmon/?rev=113&view=rev Author: buchanmilne Date: 2009-01-09 14:34:00 +0000 (Fri, 09 Jan 2009) Log Message: ----------- Set a timeout on the socket to hobbit (hardcoded to 10s for now) Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2009-01-09 13:08:17 UTC (rev 112) +++ trunk/modules/dm_snmp.pm 2009-01-09 14:34:00 UTC (rev 113) @@ -66,7 +66,8 @@ my $sock = IO::Socket::INET->new ( PeerAddr => $g{'dispserv'}, PeerPort => $g{'dispport'}, - Proto => 'tcp' + Proto => 'tcp', + Timeout => 10, ); if(defined $sock) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <buc...@us...> - 2009-01-23 10:16:14
|
Revision: 122 http://devmon.svn.sourceforge.net/devmon/?rev=122&view=rev Author: buchanmilne Date: 2009-01-23 10:16:06 +0000 (Fri, 23 Jan 2009) Log Message: ----------- Filters in hobbitdboard are regex's, anchor the conn text (Simeon Berkley) Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2009-01-23 09:15:00 UTC (rev 121) +++ trunk/modules/dm_snmp.pm 2009-01-23 10:16:06 UTC (rev 122) @@ -71,7 +71,7 @@ ); if(defined $sock) { - print $sock "hobbitdboard test=conn field=hostname,color"; + print $sock "hobbitdboard test=^conn$ field=hostname,color"; shutdown($sock, 1); while(<$sock>) { my ($device,$test,$color) = split /\|/; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <buc...@us...> - 2009-01-23 16:16:18
|
Revision: 124 http://devmon.svn.sourceforge.net/devmon/?rev=124&view=rev Author: buchanmilne Date: 2009-01-23 16:16:10 +0000 (Fri, 23 Jan 2009) Log Message: ----------- Fix typo Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2009-01-23 15:36:41 UTC (rev 123) +++ trunk/modules/dm_snmp.pm 2009-01-23 16:16:10 UTC (rev 124) @@ -71,7 +71,7 @@ ); if(defined $sock) { - print $sock "hobbitdboard test=^conn$ field=hostname,color"; + print $sock "hobbitdboard test=^conn\$ field=hostname,color"; shutdown($sock, 1); while(<$sock>) { my ($device,$test,$color) = split /\|/; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <dba...@us...> - 2009-09-11 07:06:44
|
Revision: 145 http://devmon.svn.sourceforge.net/devmon/?rev=145&view=rev Author: dbaldwin Date: 2009-09-11 07:06:33 +0000 (Fri, 11 Sep 2009) Log Message: ----------- dm_snmp.pm fixes - SNMP max PDU size increased - ignore more failing repeaters (increase to 6 from 2) to make some templates more tolerant of missing values - check conn status for host, detect real green status even if conn test blue (disabled) Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2009-09-11 06:30:41 UTC (rev 144) +++ trunk/modules/dm_snmp.pm 2009-09-11 07:06:33 UTC (rev 145) @@ -36,6 +36,7 @@ use vars qw(%g); *g = \%dm_config::g; + my $max_pdu_len = 16384; # default is 8000 # Set some of our global SNMP variables $BER::pretty_print_timeticks = 0; $SNMP_Session::suppress_warnings = 0; @@ -71,11 +72,13 @@ ); if(defined $sock) { - print $sock "hobbitdboard test=^conn\$ field=hostname,color"; + print $sock "hobbitdboard test=^conn\$ fields=hostname,color,line1"; shutdown($sock, 1); while(<$sock>) { - my ($device,$test,$color) = split /\|/; - $g{'hobbit_color'}{$device} = $color; + my ($device,$color,$line1) = split /\|/; + my ($l1col) = ($line1 =~ /^(\w+)/); + do_log("$device has hobbit status $color ($l1col)") if $g{debug}; + $g{'hobbit_color'}{$device} = $color ne "blue" && $color || $l1col; } } } @@ -434,10 +437,10 @@ next DEVICE; } elsif($snmp_ver eq '1') { - $session = SNMPv1_Session->open($host, $snmp_cid, $snmp_port); + $session = SNMPv1_Session->open($host, $snmp_cid, $snmp_port,$max_pdu_len); } elsif($snmp_ver =~ /^2c?$/) { - $session = SNMPv2c_Session->open($host, $snmp_cid, $snmp_port); + $session = SNMPv2c_Session->open($host, $snmp_cid, $snmp_port,$max_pdu_len); $session->{'use_getbulk'} = 1; } @@ -563,7 +566,7 @@ do_log("DEBUG SNMP: Failed queries $failed_query",0) if $g{'debug'}; # We dont want to do every table if we are failing alot of walks - if($failed_query > 2) { + if($failed_query > 6) { my $error_str = "Failed too many queries on $dev, aborting query"; $data_out{'error'}{$error_str} = 1; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <buc...@us...> - 2009-10-26 13:49:53
|
Revision: 156 http://devmon.svn.sourceforge.net/devmon/?rev=156&view=rev Author: buchanmilne Date: 2009-10-26 12:34:38 +0000 (Mon, 26 Oct 2009) Log Message: ----------- Ignore SNMP failures where the snmp error string is empty. This allows us, in the case where we get no data for one test (e.g., MIB is not supported/configured) to keep polling the other tests. Tests for which there is no data will then appear clear, while tests for which there is data will not (without this change, tests for which there is data available would appear clear, and finding the test with missing data was non-trivial). Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2009-10-26 11:59:58 UTC (rev 155) +++ trunk/modules/dm_snmp.pm 2009-10-26 12:34:38 UTC (rev 156) @@ -554,10 +554,12 @@ my $snmp_err; do_log("DEBUG SNMP: $SNMP_Session::errmsg",0) if $g{'debug'}; ($snmp_err = $SNMP_Session::errmsg) =~ s/\n.*//s; - my $error_str = - "Error walking $oid for $dev ($snmp_err)"; - $data_out{'error'}{$error_str} = 0; - ++$failed_query; + if ($snmp_err ne '') { + my $error_str = + "Error walking $oid for $dev ($snmp_err)"; + $data_out{'error'}{$error_str} = 0; + ++$failed_query; + } } else { # Record our maxrep value for our next poll cycle This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <buc...@us...> - 2010-03-10 22:03:22
|
Revision: 172 http://devmon.svn.sourceforge.net/devmon/?rev=172&view=rev Author: buchanmilne Date: 2010-03-10 22:03:12 +0000 (Wed, 10 Mar 2010) Log Message: ----------- Clean up some logging, including identifying the affected fork if applicable Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2010-03-10 21:13:24 UTC (rev 171) +++ trunk/modules/dm_snmp.pm 2010-03-10 22:03:12 UTC (rev 172) @@ -77,7 +77,7 @@ while(<$sock>) { my ($device,$color,$line1) = split /\|/; my ($l1col) = ($line1 =~ /^(\w+)/); - do_log("$device has hobbit status $color ($l1col)") if $g{debug}; + do_log("DEBUG SNMP: $device has hobbit status $color ($l1col)",2) if $g{debug}; $g{'hobbit_color'}{$device} = $color ne "blue" && $color || $l1col; } } @@ -260,10 +260,10 @@ my $pid = $g{'forks'}{$fork}{'pid'}; # See if we've exceeded our max poll time if((time - $g{'forks'}{$fork}{'time'}) > $g{'maxpolltime'}) { - do_log("Fork $fork ($pid) exceeded poll time polling $dev",0); + do_log("WARNING: Fork $fork ($pid) exceeded poll time polling $dev",0); # Kill it - do_log("Fork $fork ($pid) exceeded poll time polling $dev",0); - kill 15, $pid; + kill 15, $pid or do_log("WARNING: Sending fork $fork TERM signal failed: $!",0); + close $g{'forks'}{$fork}{'CS'} or do_log("WARNING: Closing socket to fork $fork failed: $!",1); delete $g{'forks'}{$fork}; --$active_forks; fork_queries(); @@ -475,9 +475,9 @@ my $oids_num = keys %{$data_in{'nonreps'}}; my $ii = 0; - do_log("DEBUG SNMP: $oids_num",0) if $g{'debug'}; + do_log("DEBUG SNMP($fork_num): $oids_num",0) if $g{'debug'}; for my $oid (keys %{$data_in{'nonreps'}}) { - do_log("DEBUG SNMP: $ii => $oid ",0) if $g{'debug'}; + do_log("DEBUG SNMP($fork_num): $ii => $oid ",0) if $g{'debug'}; $ii++; push @nrep_oids_my, $oid; push @nrep_oids, encode_oid(split /\./, $oid); @@ -488,17 +488,17 @@ for (my $index = 0; $index < $oids_num; $index++) { ++$nrep_oids_temp_cpt; push @nrep_oids_temp, $nrep_oids[$index]; -do_log("DEBUG SNMP: Adding ID => $nrep_oids_temp_cpt OID =>$nrep_oids_my[$index]",0) if $g{'debug'}; +do_log("DEBUG SNMP($fork_num): Adding ID => $nrep_oids_temp_cpt OID =>$nrep_oids_my[$index]",0) if $g{'debug'}; #if ($nrep_oids_temp_cpt == 10) { - do_log("DEBUG SNMP: Pooling $nrep_oids_temp_cpt oids",0) if $g{'debug'}; + do_log("DEBUG SNMP($fork_num): Pooling $nrep_oids_temp_cpt oids",0) if $g{'debug'}; if(@nrep_oids_temp) { if($session->get_request_response(@nrep_oids_temp)) { my $response = $session->pdu_buffer; my ($bindings) = $session->decode_get_response($response); if(!defined $bindings or $bindings eq '') { my $snmp_err; - do_log("DEBUG SNMP: $SNMP_Session::errmsg",0) if $g{'debug'}; + do_log("DEBUG SNMP($fork_num): $SNMP_Session::errmsg",0) if $g{'debug'}; ($snmp_err = $SNMP_Session::errmsg) =~ s/\n.*//s; my $error_str = "snmpget $dev ($snmp_err)"; $data_out{'error'}{$error_str} = 0; @@ -552,7 +552,7 @@ # Catch any failures if(!defined $num_reps or $num_reps == 0) { my $snmp_err; - do_log("DEBUG SNMP: $SNMP_Session::errmsg",0) if $g{'debug'}; + do_log("DEBUG SNMP($fork_num): $SNMP_Session::errmsg",0) if $g{'debug'}; ($snmp_err = $SNMP_Session::errmsg) =~ s/\n.*//s; if ($snmp_err ne '') { my $error_str = @@ -566,7 +566,7 @@ $data_out{'maxrep'}{$oid} = $num_reps + 1; } - do_log("DEBUG SNMP: Failed queries $failed_query",0) if $g{'debug'}; + do_log("DEBUG SNMP($fork_num): Failed queries $failed_query",0) if $g{'debug'}; # We dont want to do every table if we are failing alot of walks if($failed_query > 6) { my $error_str = This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <buc...@us...> - 2010-03-10 22:13:40
|
Revision: 173 http://devmon.svn.sourceforge.net/devmon/?rev=173&view=rev Author: buchanmilne Date: 2010-03-10 22:13:34 +0000 (Wed, 10 Mar 2010) Log Message: ----------- Maintain per-fork polled device counter Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2010-03-10 22:03:12 UTC (rev 172) +++ trunk/modules/dm_snmp.pm 2010-03-10 22:13:34 UTC (rev 173) @@ -50,6 +50,10 @@ # Sub that, given a hash of device data, will query specified oids for # each device and return a hash of the snmp query results sub poll_devices { + # clear per-fork polled device counters + foreach (keys %{$g{'forks'}} ) { + $g{'forks'}{$_}{'polled'} = 0; + } do_log("DEBUG SNMP: running poll_devices()",0) if $g{'debug'}; do_log("Starting snmp queries",1); @@ -224,6 +228,8 @@ # If we got good data, reset the fail counter to 0 $g{'fail'}{$dev} = 0; + # increment the per-fork polled device counter + $g{'forks'}{$fork}{'polled'}++; } else { print "failed thaw on $dev\n"; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <buc...@us...> - 2010-03-10 22:24:36
|
Revision: 175 http://devmon.svn.sourceforge.net/devmon/?rev=175&view=rev Author: buchanmilne Date: 2010-03-10 22:24:30 +0000 (Wed, 10 Mar 2010) Log Message: ----------- Process forks in numerical order instead of hash order Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2010-03-10 22:17:50 UTC (rev 174) +++ trunk/modules/dm_snmp.pm 2010-03-10 22:24:30 UTC (rev 175) @@ -200,7 +200,7 @@ my @devices = keys %{$snmp_input}; while(@devices or $active_forks) { - for my $fork (1 .. $g{'numforks'}) { + foreach my $fork (sort {$a <=> $b} keys %{$g{'forks'}}) { # First lets see if our fork is working on a device if(defined $g{'forks'}{$fork}{'dev'}) { @@ -337,8 +337,9 @@ my $pid; # Find our next available placeholder - for (keys %{$g{'forks'}}) + for (sort {$a <=> $b} keys %{$g{'forks'}}) {++$num and next if defined $g{'forks'}{$num}; last} + do_log("Starting fork number $num") if $g{'debug'}; # Open up our communication sockets socketpair( This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <buc...@us...> - 2010-03-10 22:28:12
|
Revision: 176 http://devmon.svn.sourceforge.net/devmon/?rev=176&view=rev Author: buchanmilne Date: 2010-03-10 22:28:03 +0000 (Wed, 10 Mar 2010) Log Message: ----------- Close unnecessary sockets Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2010-03-10 22:24:30 UTC (rev 175) +++ trunk/modules/dm_snmp.pm 2010-03-10 22:28:03 UTC (rev 176) @@ -355,15 +355,21 @@ if($pid = fork) { # Parent code here - close $g{'forks'}{$num}{'PS'}; # dont need to communicate with ourself + do_log("Fork number $num started with pid $pid") if $g{'debug'}; + close $g{'forks'}{$num}{'PS'} or do_log("Closing socket to ourself failed: $!\n"); # dont need to communicate with ourself $g{'forks'}{$num}{'pid'} = $pid; + $g{'forks'}{$num}{'time'} = time; $g{'forks'}{$num}{'CS'}->blocking(0); } elsif(defined $pid) { # Child code here $g{'parent'} = 0; # We arent the parent any more... - close $g{'forks'}{$num}{'CS'}; # Same as above - $0 = 'devmon'; # Remove our 'master' tag + do_log("DEBUG SNMP: Fork $num using sockets $g{'forks'}{$num}{'PS'} <-> $g{'forks'}{$num}{'CS'} for IPC") if $g{'debug'}; + foreach (sort {$a <=> $b} keys %{$g{'forks'}}) { + do_log("DEBUG SNMP: Fork $num closing socket (child $_) $g{'forks'}{$_}{'PS'}") if $g{'debug'}; + $g{'forks'}{$_}{'CS'}->close or do_log("Closing socket for fork $_ failed: $!"); # Same as above + } + $0 = "devmon-$num"; # Remove our 'master' tag fork_sub($num); # Enter our neverending query loop exit; # We should never get here, but just in case } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <buc...@us...> - 2010-03-10 22:39:00
|
Revision: 177 http://devmon.svn.sourceforge.net/devmon/?rev=177&view=rev Author: buchanmilne Date: 2010-03-10 22:38:54 +0000 (Wed, 10 Mar 2010) Log Message: ----------- Make the master "ping" the idle forks to check that IPC works, and kill the forks if there are any errors Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2010-03-10 22:28:03 UTC (rev 176) +++ trunk/modules/dm_snmp.pm 2010-03-10 22:38:54 UTC (rev 177) @@ -320,6 +320,90 @@ ++$active_forks; $g{'forks'}{$fork}{'time'} = time; } + + # If our fork is idle and has been for more than the cycle time + # make sure it is still alive + if(!defined $g{'forks'}{$fork}{'dev'}) { + my $idletime = time - $g{'forks'}{$fork}{'time'}; + next if ($idletime <= $g{'cycletime'}); + if (defined $g{'forks'}{$fork}{'pinging'}) { + do_log("DEBUG SNMP: Fork $fork was pinged, checking for reply",4) if $g{'debug'}; + my $select = IO::Select->new($g{'forks'}{$fork}{'CS'}); + if($select->can_read(0.01)) { + + do_log("DEBUG SNMP: Fork $fork has data, reading it",4) if $g{'debug'}; + # Okay, we know we have something in the buffer, keep reading + # till we get an EOF + my $data_in = ''; + eval { + local $SIG{ALRM} = sub { die "Timeout waiting for EOF from fork" }; + alarm 5; + do { + my $read = $g{'forks'}{$fork}{'CS'}->getline(); + if(defined $read and $read ne '') {$data_in .= $read} + else {select undef, undef, undef, 0.001} + } until $data_in =~ s/\nEOF\n$//s; + alarm 0; + }; + if($@) { + do_log("Fork $fork, pid $g{'forks'}{$fork}{'pid'} stalled on reply to ping: $@. Killing this fork."); + kill 15, $g{'forks'}{$fork}{'pid'} or do_log("Sending $fork TERM signal failed: $!"); + close $g{'forks'}{$fork}{'CS'} or do_log("Closing socket to fork $fork failed: $!"); + delete $g{'forks'}{$fork}; + next; + } + do_log("DEBUG SNMP: Fork $fork returned complete message for ping request",4) if $g{'debug'}; + + my $hashref = thaw($data_in); + my %returned; + if (defined $hashref) { + do_log("DEBUG SNMP: Dethawing data for ping of fork $fork",4) if $g{'debug'}; + %returned = %{ thaw($data_in) }; + } + else { + print "failed thaw for ping of fork $fork\n"; + next; + } + if (defined $returned{'pong'}) { + $g{'forks'}{$fork}{'time'} = time; + do_log("Fork $fork responded to ping request $returned{'ping'} with $returned{'pong'} at $g{'forks'}{$fork}{'time'}",4) if $g{'debug'}; + delete $g{'forks'}{$fork}{'pinging'}; + } else { + do_log("Fork $fork didnt send an appropriate response, killing it",4) if $g{'debug'}; + kill 15, $g{'forks'}{$fork}{'pid'} or do_log("Sending $fork TERM signal failed: $!"); + close $g{'forks'}{$fork}{'CS'} or do_log("Closing socket to fork $fork failed: $!"); + delete $g{'forks'}{$fork}; + next; + } + + } else { + do_log("DEBUG SNMP: Fork $fork seems not to have replied to our ping, killing it",4); + kill 15, $g{'forks'}{$fork}{'pid'} or do_log("Sending $fork TERM signal failed: $!"); + close $g{'forks'}{$fork}{'CS'} or do_log("Closing socket to fork $fork failed: $!"); + delete $g{'forks'}{$fork}; + next; + } + + } else { + my %ping_input = ('ping' => time); + do_log("Fork $fork has been idle for more than cycle time, pinging it at $ping_input{'ping'}",4) if $g{'debug'}; + my $serialized = nfreeze(\%ping_input); + eval { + local $SIG{ALRM} = sub { die "Timeout sending polling task data to fork\n" }; + alarm 15; + $g{'forks'}{$fork}{'CS'}->print("$serialized\nEOF\n"); + alarm 0; + }; + if($@) { + do_log("Fork $g{'forks'}{$fork}, pid $g{'forks'}{$fork}{'pid'} not responding: $@. Killing this fork."); + kill 15, $g{'forks'}{$fork}{'pid'} or do_log("Sending TERM signal to fork $fork failed: $!"); + close $g{'forks'}{$fork}{'CS'} or do_log("Closing socket to fork $fork failed: $!"); + delete $g{'forks'}{$fork}; + next; + } + $g{'forks'}{$fork}{'pinging'} = 1; + } + } } } } @@ -410,16 +494,40 @@ # Our getline timed out, which means we haven't gotten any data # in a while. Make sure our parent is still there if($@) { - exit 1 if !kill 0, $g{'mypid'}; + do_log("Fork $fork_num timed out waiting for data from parent: $@",4); + if (!kill 0, $g{'mypid'}) { + do_log("Parent is no longer running, fork $fork_num exiting"); + exit 1; + } } $serialized .= $string_in if defined $string_in; } until $serialized =~ s/\nEOF\n$//s; + do_log("DEBUG SNMP($fork_num): Got EOF in message, attempting to thaw",4) if $g{'debug'}; # Now decode our serialized data scalar - my %data_in = %{thaw($serialized)}; + my %data_in; + eval { + %data_in = %{thaw($serialized)}; + }; + if ($@) { + do_log("DEBUG SNMP($fork_num): thaw failed attempting to thaw $serialized: $@",4) if $g{'debug'}; + do_log("DEBUG SNMP($fork_num): Replying to corrupt message with a pong",4) if $g{'debug'}; + $data_out{'ping'} = '0'; + $data_out{'pong'} = time; + send_data($sock,\%data_out); + next DEVICE; + } + if (defined $data_in{'ping'}) { + do_log("DEBUG SNMP($fork_num): Received ping from master $data_in{'ping'},replying",4) if $g{'debug'}; + $data_out{'ping'} = $data_in{'ping'}; + $data_out{'pong'} = time; + send_data($sock,\%data_out); + next DEVICE; + } + # Get SNMP variables my $snmp_cid = $data_in{'cid'}; my $snmp_port = $data_in{'port'} || 161; # Default to 161 if not specified This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <buc...@us...> - 2010-03-10 22:43:25
|
Revision: 178 http://devmon.svn.sourceforge.net/devmon/?rev=178&view=rev Author: buchanmilne Date: 2010-03-10 22:43:19 +0000 (Wed, 10 Mar 2010) Log Message: ----------- Add timeouts for all socket communication Make some timeouts dependant on the cycle time Log errors for any socket errors Clean up sockets and forks better Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2010-03-10 22:38:54 UTC (rev 177) +++ trunk/modules/dm_snmp.pm 2010-03-10 22:43:19 UTC (rev 178) @@ -210,14 +210,28 @@ my $select = IO::Select->new($g{'forks'}{$fork}{'CS'}); if($select->can_read(0.01)) { + do_log("DEBUG SNMP: Fork $fork has data for device $dev, reading it",3) if $g{'debug'}; # Okay, we know we have something in the buffer, keep reading # till we get an EOF my $data_in = ''; - do { - my $read = $g{'forks'}{$fork}{'CS'}->getline(); - if(defined $read and $read ne '') {$data_in .= $read} - else {select undef, undef, undef, 0.001} - } until $data_in =~ s/\nEOF\n$//s; + eval { + local $SIG{ALRM} = sub { die "Timeout waiting for EOF from fork\n" }; + alarm 15; + do { + my $read = $g{'forks'}{$fork}{'CS'}->getline(); + if(defined $read and $read ne '') {$data_in .= $read} + else {select undef, undef, undef, 0.001} + } until $data_in =~ s/\nEOF\n$//s; + alarm 0; + }; + if($@) { + do_log("Fork $g{'forks'}{$fork}, pid $g{'forks'}{$fork}{'pid'} stalled on device $dev: $@. Killing this fork.",1); + kill 15, $g{'forks'}{$fork}{'pid'} or do_log("Sending $fork TERM signal failed: $!",2); + close $g{'forks'}{$fork}{'CS'} or do_log("Closing socket to fork $fork failed: $!",2); + delete $g{'forks'}{$fork}; + next; + } + do_log("DEBUG SNMP: Fork $fork returned complete message for device $dev",3) if $g{'debug'}; # Looks like we got some data my $hashref = thaw($data_in); @@ -285,6 +299,7 @@ elsif (!kill 0, $pid) { # Whoops, looks like our fork died somewhow do_log("Fork $fork ($pid) died polling $dev",0); + close $g{'forks'}{$fork}{'CS'} or do_log("Closing socket to fork $fork failed: $!",1); delete $g{'forks'}{$fork}; --$active_forks; fork_queries(); @@ -316,7 +331,20 @@ # Now send our input to the fork my $serialized = nfreeze($snmp_input->{$dev}); - $g{'forks'}{$fork}{'CS'}->print("$serialized\nEOF\n"); + eval { + local $SIG{ALRM} = sub { die "Timeout sending polling task data to fork\n" }; + alarm 15; + $g{'forks'}{$fork}{'CS'}->print("$serialized\nEOF\n"); + alarm 0; + }; + if($@) { + do_log("Fork $g{'forks'}{$fork}, pid $g{'forks'}{$fork}{'pid'} not responding: $@. Killing this fork.",0); + kill 15, $g{'forks'}{$fork}{'pid'} or do_log("Sending TERM signal to fork $fork failed: $!",0); + close $g{'forks'}{$fork}{'CS'} or do_log("Closing socket to fork $fork failed: $!",1); + delete $g{'forks'}{$fork}; + next; + } + ++$active_forks; $g{'forks'}{$fork}{'time'} = time; } @@ -486,7 +514,7 @@ # messily and leave us hanging around eval { local $SIG{ALRM} = sub { die "Timeout\n" }; - alarm 15; + alarm $g{'cycletime'}; $string_in = $sock->getline(); alarm 0; }; @@ -709,7 +737,11 @@ sub check_forks { for my $fork (keys %{$g{'forks'}}) { my $pid = $g{'forks'}{$fork}{'pid'}; - delete $g{'forks'}{$fork} if !kill 0, $pid; + if (!kill 0, $pid) { + do_log("Fork $fork with pid $pid died, cleaning up",3); + close $g{'forks'}{$fork}{'CS'} or do_log("Closing child socket failed: $!",2); + delete $g{'forks'}{$fork}; + } } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <buc...@us...> - 2010-03-10 22:51:14
|
Revision: 179 http://devmon.svn.sourceforge.net/devmon/?rev=179&view=rev Author: buchanmilne Date: 2010-03-10 22:51:08 +0000 (Wed, 10 Mar 2010) Log Message: ----------- Improve timeout handling/logging in-fork Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2010-03-10 22:43:19 UTC (rev 178) +++ trunk/modules/dm_snmp.pm 2010-03-10 22:51:08 UTC (rev 179) @@ -513,8 +513,8 @@ # Wrap our getline in alarm code to make sure our parent doesn't die # messily and leave us hanging around eval { - local $SIG{ALRM} = sub { die "Timeout\n" }; - alarm $g{'cycletime'}; + local $SIG{ALRM} = sub { die "Timeout" }; + alarm $g{'cycletime'} * 2; $string_in = $sock->getline(); alarm 0; }; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <buc...@us...> - 2011-04-04 10:32:09
|
Revision: 222 http://devmon.svn.sourceforge.net/devmon/?rev=222&view=rev Author: buchanmilne Date: 2011-04-04 10:32:02 +0000 (Mon, 04 Apr 2011) Log Message: ----------- Try and avoid CPU racing by forks if master is unresponsive Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2011-03-22 07:58:22 UTC (rev 221) +++ trunk/modules/dm_snmp.pm 2011-04-04 10:32:02 UTC (rev 222) @@ -522,11 +522,14 @@ # Our getline timed out, which means we haven't gotten any data # in a while. Make sure our parent is still there if($@) { - do_log("Fork $fork_num timed out waiting for data from parent: $@",4); + do_log("Fork $fork_num timed out waiting for data from parent: $@",3); if (!kill 0, $g{'mypid'}) { do_log("Parent is no longer running, fork $fork_num exiting"); exit 1; } + my $sleeptime = $g{'cycletime'} / 2; + do_log("Parent ($g{'mypid'}) seems to be running, fork $fork_num sleeping for $sleeptime",3); + sleep $sleeptime; } $serialized .= $string_in if defined $string_in; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <buc...@us...> - 2011-04-04 10:47:30
|
Revision: 226 http://devmon.svn.sourceforge.net/devmon/?rev=226&view=rev Author: buchanmilne Date: 2011-04-04 10:47:24 +0000 (Mon, 04 Apr 2011) Log Message: ----------- Suppress SNMP warnings if not in debug Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2011-04-04 10:45:05 UTC (rev 225) +++ trunk/modules/dm_snmp.pm 2011-04-04 10:47:24 UTC (rev 226) @@ -39,7 +39,7 @@ my $max_pdu_len = 16384; # default is 8000 # Set some of our global SNMP variables $BER::pretty_print_timeticks = 0; - $SNMP_Session::suppress_warnings = 0; + $SNMP_Session::suppress_warnings = $g{'debug'} ? 0 : 1; # Fiddle with some of our storable settings to correct byte order... $Storable::interwork_56_64bit = 1; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <buc...@us...> - 2012-08-03 10:36:34
|
Revision: 236 http://devmon.svn.sourceforge.net/devmon/?rev=236&view=rev Author: buchanmilne Date: 2012-08-03 10:36:23 +0000 (Fri, 03 Aug 2012) Log Message: ----------- Reduce failed query logging Modified Paths: -------------- trunk/modules/dm_snmp.pm Modified: trunk/modules/dm_snmp.pm =================================================================== --- trunk/modules/dm_snmp.pm 2012-08-03 10:32:02 UTC (rev 235) +++ trunk/modules/dm_snmp.pm 2012-08-03 10:36:23 UTC (rev 236) @@ -718,7 +718,7 @@ $data_out{'maxrep'}{$oid} = $num_reps + 1; } - do_log("DEBUG SNMP($fork_num): Failed queries $failed_query",0) if $g{'debug'}; + do_log("DEBUG SNMP($fork_num): Failed queries $failed_query",0) if ($g{'debug'} and $failed_query gt 0); # We dont want to do every table if we are failing alot of walks if($failed_query > 6) { my $error_str = This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |