[Mon-commit] mon mon,1.4.2.9,1.4.2.10 mon.spec,1.1.2.1,1.1.2.2
Brought to you by:
trockij
From: Jim T. <tr...@us...> - 2004-08-02 19:47:40
|
Update of /cvsroot/mon/mon In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18678 Modified Files: Tag: mon-1-0-0pre1 mon mon.spec Log Message: when allow_empty_group is not set and no host arguments to pass to a monitor, the interval wasn't being reset so it would spam the syslog with lots of "no host arguments" messages. also, in reset_timer, there was a chance when seen the bug where lots of mon processes piling up for some unxplained reason? there's the explanation: -some monitor fails, and it forks a child to call the alert -the child sets up the environment then tries to exec the alert, but the exec fails for one reason or another. the child syslogs the failure, but returns instead of calling exit, so you wind up with another mon process running -it could get quite bad of the unexpected mons register failures and call more alerts, and those alerts fail, etc... this is fixed now, and some better debugging added. Index: mon.spec =================================================================== RCS file: /cvsroot/mon/mon/Attic/mon.spec,v retrieving revision 1.1.2.1 retrieving revision 1.1.2.2 diff -C2 -d -r1.1.2.1 -r1.1.2.2 *** mon.spec 12 Jul 2004 12:46:23 -0000 1.1.2.1 --- mon.spec 2 Aug 2004 19:47:31 -0000 1.1.2.2 *************** *** 12,17 **** Name: mon ! Version: 1.0.0pre3 ! Release: 1 Summary: The mon network monitoring system License: GPL --- 12,17 ---- Name: mon ! Version: 1.0.0pre4jt1 ! Release: 2 Summary: The mon network monitoring system License: GPL *************** *** 92,96 **** find %{buildroot} -name "perllocal.pod" -o -name ".packlist" -o -name "*.bs" |xargs -i rm -f {} # build filelist ! echo "%defattr(0664,root,root)" > %filelist find %{buildroot} -type f -printf "/%%P\n" | grep -v "man/man" >> %filelist --- 92,96 ---- find %{buildroot} -name "perllocal.pod" -o -name ".packlist" -o -name "*.bs" |xargs -i rm -f {} # build filelist ! echo "%defattr(-,root,root)" > %filelist find %{buildroot} -type f -printf "/%%P\n" | grep -v "man/man" >> %filelist Index: mon =================================================================== RCS file: /cvsroot/mon/mon/mon,v retrieving revision 1.4.2.9 retrieving revision 1.4.2.10 diff -C2 -d -r1.4.2.9 -r1.4.2.10 *** mon 9 Jul 2004 13:27:33 -0000 1.4.2.9 --- mon 2 Aug 2004 19:47:31 -0000 1.4.2.10 *************** *** 446,452 **** syslog ('info', "throttled at $procs processes"); } - } ! else { --- 446,451 ---- syslog ('info', "throttled at $procs processes"); } } ! else { *************** *** 798,802 **** if (!open (CFG, "m4 $CF |")); } ! else { --- 797,801 ---- if (!open (CFG, "m4 $CF |")); } ! else { *************** *** 1110,1114 **** next; } ! if ($inalias) { --- 1109,1113 ---- next; } ! if ($inalias) { *************** *** 1150,1154 **** next; } ! if ($inwatch) { --- 1149,1153 ---- next; } ! if ($inwatch) { *************** *** 1255,1259 **** $args = $2; } ! else { --- 1254,1258 ---- $args = $2; } ! else { *************** *** 1290,1294 **** push @{$pref->{"alerts"}}, $args; } ! elsif ($var eq "upalert") { --- 1289,1293 ---- push @{$pref->{"alerts"}}, $args; } ! elsif ($var eq "upalert") { *************** *** 1296,1305 **** push @{$pref->{"upalerts"}}, $args; } ! elsif ($var eq "startupalert") { push @{$pref->{"startupalerts"}}, $args; } ! elsif ($var eq "alertevery") { --- 1295,1304 ---- push @{$pref->{"upalerts"}}, $args; } ! elsif ($var eq "startupalert") { push @{$pref->{"startupalerts"}}, $args; } ! elsif ($var eq "alertevery") { *************** *** 1392,1396 **** } } ! elsif ($var eq "upalertafter") { --- 1391,1395 ---- } } ! elsif ($var eq "upalertafter") { *************** *** 1402,1406 **** $pref->{"upalertafter"} = $args; } ! elsif ($var eq "numalerts") { --- 1401,1405 ---- $pref->{"upalertafter"} = $args; } ! elsif ($var eq "numalerts") { *************** *** 1472,1476 **** } } ! elsif ($var eq "randskew") { --- 1471,1475 ---- } } ! elsif ($var eq "randskew") { *************** *** 1481,1486 **** } - - elsif ($var eq "dep_behavior") { --- 1480,1483 ---- *************** *** 1772,1776 **** configure_filehandle (*TRAPSERVER) || die_die ("err", "could not configure UDP trap port: $!"); ! return if (!$CF{"SNMP"}); --- 1769,1773 ---- configure_filehandle (*TRAPSERVER) || die_die ("err", "could not configure UDP trap port: $!"); ! return if (!$CF{"SNMP"}); *************** *** 2023,2027 **** } sock_write ($fh, "220 test monitor completed\n"); ! # # test alert --- 2020,2024 ---- } sock_write ($fh, "220 test monitor completed\n"); ! # # test alert *************** *** 2070,2074 **** sock_write ($fh, "220 test alert completed\n"); } ! # # test config file --- 2067,2071 ---- sock_write ($fh, "220 test alert completed\n"); } ! # # test config file *************** *** 2419,2423 **** unless (@listAliasesRequest == 0); sock_write ($fh, "220 list aliasgroups completed\n"); ! # # list deps --- 2416,2420 ---- unless (@listAliasesRequest == 0); sock_write ($fh, "220 list aliasgroups completed\n"); ! # # list deps *************** *** 2470,2475 **** sock_write ($fh, "520 unknown list command\n"); } - - # --- 2467,2470 ---- *************** *** 2487,2491 **** my $sref = \%{$watch{$group}->{$service}}; ! if ($sref->{"_op_status"} == $STAT_OK || $sref->{"_op_status"} == $STAT_UNTESTED) { --- 2482,2486 ---- my $sref = \%{$watch{$group}->{$service}}; ! if ($sref->{"_op_status"} == $STAT_OK || $sref->{"_op_status"} == $STAT_UNTESTED) { *************** *** 2627,2631 **** sock_write ($fh, "220 command authorized\n"); } ! else { --- 2622,2626 ---- sock_write ($fh, "220 command authorized\n"); } ! else { *************** *** 3177,3202 **** } ! if (@ghosts == 0 && !defined ($sref->{"allow_empty_group"})) { syslog ('err', "monitor for $group/$service" . " not called because of no host arguments\n"); ! } else { $fhandles{"$group/$service"} = new FileHandle; ! $pid = open($fhandles{"$group/$service"}, '-|'); ! if (!defined $pid) { syslog ('err', "Could not fork: $!"); delete $fhandles{"$group/$service"}; return 0; ! } elsif ($pid == 0) { open(STDERR, '>&STDOUT') or syslog ('err', "Could not dup stderr: $!"); open(STDIN, "</dev/null") or syslog ('err', "Could not connect stdin to /dev/null: $!"); my $v; ! foreach $v (keys %{$sref->{"ENV"}}) { $ENV{$v} = $sref->{"ENV"}->{$v}; } $ENV{"MON_LAST_SUMMARY"} = $sref->{"_last_summary"}; $ENV{"MON_LAST_OUTPUT"} = $sref->{"_last_output"}; --- 3172,3210 ---- } ! if (@ghosts == 0 && !defined ($sref->{"allow_empty_group"})) ! { syslog ('err', "monitor for $group/$service" . " not called because of no host arguments\n"); + reset_timer ($group, $service); + } ! else ! { $fhandles{"$group/$service"} = new FileHandle; ! $pid = open ($fhandles{"$group/$service"}, '-|'); ! ! if (!defined $pid) ! { syslog ('err', "Could not fork: $!"); delete $fhandles{"$group/$service"}; return 0; + } ! elsif ($pid == 0) ! { open(STDERR, '>&STDOUT') or syslog ('err', "Could not dup stderr: $!"); + open(STDIN, "</dev/null") or syslog ('err', "Could not connect stdin to /dev/null: $!"); + my $v; ! ! foreach $v (keys %{$sref->{"ENV"}}) ! { $ENV{$v} = $sref->{"ENV"}->{$v}; } + $ENV{"MON_LAST_SUMMARY"} = $sref->{"_last_summary"}; $ENV{"MON_LAST_OUTPUT"} = $sref->{"_last_output"}; *************** *** 3208,3213 **** $ENV{"MON_STATEDIR"} = $CF{"STATEDIR"}; $ENV{"MON_LOGDIR"} = $CF{"LOGDIR"}; ! exec @args or syslog ('err', "could not exec '@args': $!") ! && exit(1); } --- 3216,3225 ---- $ENV{"MON_STATEDIR"} = $CF{"STATEDIR"}; $ENV{"MON_LOGDIR"} = $CF{"LOGDIR"}; ! ! if (!exec @args) ! { ! syslog ('err', "could not exec '@args': $!"); ! exit (1); ! } } *************** *** 3263,3272 **** int(rand($sref->{"randskew"})+1)); } ! elsif ($sref->{"_next_check"}) { ! $sref->{"_timer"} = $sref->{"_next_check"} - time(); } ! else { --- 3275,3287 ---- int(rand($sref->{"randskew"})+1)); } ! elsif ($sref->{"_next_check"}) { ! if (($sref->{"_timer"} = $sref->{"_next_check"} - time()) < 0) ! { ! $sref->{"_timer"} = $sref->{"interval"}; ! } } ! else { *************** *** 3503,3507 **** return undef if (!defined $pass); ! if ((crypt ($plaintext, $pass)) ne $pass) { return undef; --- 3518,3522 ---- return undef if (!defined $pass); ! if ((crypt ($plaintext, $pass)) ne $pass) { return undef; *************** *** 3820,3824 **** $trap{$trap_name} = un_esc_str ($trap_val); } ! else { --- 3835,3839 ---- $trap{$trap_name} = un_esc_str ($trap_val); } ! else { *************** *** 3841,3845 **** $traphost = "*"; } ! else { --- 3856,3860 ---- $traphost = "*"; } ! else { *************** *** 3915,3919 **** return; } ! elsif (!defined $watch{$trap{"grp"}}->{$trap{"svc"}}) { --- 3930,3934 ---- return; } ! elsif (!defined $watch{$trap{"grp"}}->{$trap{"svc"}}) { *************** *** 4039,4043 **** } next; ! # # SNMP trap --- 4054,4058 ---- } next; ! # # SNMP trap *************** *** 4194,4198 **** # sub normalize_paths { ! my ($authtype, @authtypes); --- 4209,4213 ---- # sub normalize_paths { ! my ($authtype, @authtypes); *************** *** 4365,4369 **** ($STAT_FAIL, $STAT_OK, $STAT_COLDSTART, $STAT_WARMSTART, $STAT_LINKDOWN, $STAT_UNKNOWN, $STAT_TIMEOUT, $STAT_UNTESTED, $STAT_DEPEND, $STAT_WARN) = (0..9); ! %FAILURE = ( $STAT_FAIL => 1, --- 4380,4384 ---- ($STAT_FAIL, $STAT_OK, $STAT_COLDSTART, $STAT_WARMSTART, $STAT_LINKDOWN, $STAT_UNKNOWN, $STAT_TIMEOUT, $STAT_UNTESTED, $STAT_DEPEND, $STAT_WARN) = (0..9); ! %FAILURE = ( $STAT_FAIL => 1, *************** *** 4426,4433 **** $pref->{"_last_alert"} = 0 if ($pref->{"alertevery"}); ! $pref->{"_consec_failures"} = 0 if ($pref->{"alertafter_consec"}); ! $pref->{'_1stfailtime'} = 0 if ($pref->{"alertafterival"}); --- 4441,4448 ---- $pref->{"_last_alert"} = 0 if ($pref->{"alertevery"}); ! $pref->{"_consec_failures"} = 0 if ($pref->{"alertafter_consec"}); ! $pref->{'_1stfailtime'} = 0 if ($pref->{"alertafterival"}); *************** *** 4606,4612 **** if (!exec @execargs) { ! syslog ('err', "could not exec alert $alert: $!"); ! return undef; } exit; } --- 4621,4628 ---- if (!exec @execargs) { ! syslog ('err', "child could not exec alert $alert (execargs=" . join (",", @execargs) . "): $!"); ! exit (1); } + exit; } *************** *** 4624,4627 **** --- 4640,4653 ---- return (1) if ($args{"flags"} & $FL_TEST); + my $exitval = $? >> 8; + + if ($exitval) + { + syslog ("err", "child alert for " . + " $args{group}/$args{service} " . + "failed, exited with $exitval"); + return undef; + } + # # tally this alert *************** *** 4871,4875 **** my $msg = shift; my $ans = ""; ! $ans = $PAM_username if ($code == Authen::PAM::PAM_PROMPT_ECHO_ON() ); $ans = $PAM_password if ($code == Authen::PAM::PAM_PROMPT_ECHO_OFF() ); --- 4897,4901 ---- my $msg = shift; my $ans = ""; ! $ans = $PAM_username if ($code == Authen::PAM::PAM_PROMPT_ECHO_ON() ); $ans = $PAM_password if ($code == Authen::PAM::PAM_PROMPT_ECHO_OFF() ); |