[Mon-commit] mon/mon.d ntpdate.monitor,NONE,1.1.2.1
Brought to you by:
trockij
From: Jim T. <tr...@us...> - 2004-06-23 13:25:40
|
Update of /cvsroot/mon/mon/mon.d In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30450 Added Files: Tag: mon-1-0-0pre1 ntpdate.monitor Log Message: from meekj with a couple changes from trockij to make it more verbose for non-failures --- NEW FILE: ntpdate.monitor --- #!/usr/bin/perl # # ntpdate.monitor Verify that NTP is running and times are within tolerance # ntpdate will do most of the work for us # =head1 NAME B<ntpdate.monitor> - ntp monitor using ntpdate to do most of the work =head1 DESCRIPTION A mon monitor to verify that ntp is running on multiple servers, those servers have synchronized time, and that the times are within specified limits. The mon server should be running ntp since the times are reported relative to the system performing the query. =head1 SYNOPSIS B<ntpdate.monitor -d -l log_file_YYYYMM.log --maxstratum nn --maxoffset n.nn> =head1 OPTIONS =over 5 =item B<--maxstratum> Maximum stratum number, default is 10. Stratum 16 indicates that ntp is running on a system, but the clock is not synchronized. An alarm will be triggered if this value is exceeded. =item B<--maxoffset> Maximum value of the clock offset in seconds, default is 0.8 s (a large value, ntp typically keeps clocks within milliseconds of each other). An alarm will be triggered if this value is exceeded. =item B<-l log_file_template> or B<--log log_file_template> /path/to/logs/internet_web_YYYYMM.log Current year & month are substituted for YYYYMM, that is the only possible template at this time. The format of the log file is: =item B<-d> Debug/Test, for manual testing only. =back =head1 MON CONFIGURATION EXAMPLE hostgroup ntp ntp1.somedomain.org ntp2.somedomain.org ntp3.somedomain.org watch ntp service ntpdate interval 30m monitor ntpdate.monitor --maxoffset 0.1 --log /usr/local/mon/logs/gv-ntp-YYYYMM.log period wd {Sun-Sat} alert mail.alert us...@so... alertevery 1h summary =head1 BUGS The location of ntpdate is hardcoded to be /usr/sbin/ntpdate. This works for Solaris 8 and RedHat Linux 7.x, at least, but it should be configurable. Check the first line of this file to be sure that it points to an appropriate perl executable. =head1 AUTHOR Jon Meek, me...@ie... =head1 SEE ALSO ntp.monitor by Daniel Hagerty <ha...@li...> =cut $RCSid = q{$Id: ntpdate.monitor,v 1.1.2.1 2004/06/23 13:25:32 trockij Exp $ }; # # Jon Meek # Lawrenceville, NJ # me...@ie... # # # $Id: ntpdate.monitor,v 1.1.2.1 2004/06/23 13:25:32 trockij Exp $ # # Copyright (C) 2002, Jon Meek # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # use Getopt::Long; use Data::Dumper; GetOptions( "maxstratum=i" => \$MaxStratum, "maxoffset=f" => \$MaxOffset, # "dns" => \$UseDNS, "d" => \$opt_d, "l=s" => \$opt_l, "log=s" => \$opt_l, ); use Net::DNS; # # Set Defaults # # ntpdate reports stratum 16 if ntp is running, but time is not synchronized # stratum 0 will be reported if ntp is not running # $MaxStratum = 10 unless $MaxStratum; $MinStratum = 1; # # Trigger alarm if the time is ever off by this much # $MaxOffset = 0.800 unless $MaxOffset; # Seconds $NTPDATE = '/usr/sbin/ntpdate'; @Failures = (); @Hosts = @ARGV; # Host names are left on the command line after Getopt %NameByIP = &DNSlookups(\@Hosts); $TimeOfDay = time; # Current time print "TimeOfDay: $TimeOfDay\n" if $opt_d; $cmd = qq{$NTPDATE -q @Hosts |}; $pid = open(NTP, $cmd) || die "Couldn't run $cmd\n"; while ($in = <NTP>) { # print $in if $opt_d; chomp $in; # # Pick out server strings # if ($in =~ /^server\s+([\d\.]+),\s+stratum\s+(\d+),\s+offset\s+([\d\.\-\+]+),\s+delay\s+([\d\.\-\+]+)/) { $ip = $1; $stratum = $2; $offset = $3; $delay = $4; $name = $NameByIP{$ip}; print "$in $name\n" if $opt_d; # # Prepare log entries # if (exists $NameByIP{$ip}) { $hostnameforlog = $NameByIP{$ip}; } else { $hostnameforlog = $ip; } if ($opt_l or $opt_d) { $LogString{$ip} = qq{$TimeOfDay $hostnameforlog $stratum $offset $delay}; } # # Check alarm limits # if (($stratum > $MaxStratum) || ($stratum < $MinStratum) || (abs($offset) > $MaxOffset)) { $FailureDetail{$ip} = $in; print "Fail: $ip $stratum $offset $delay $name\n" if $opt_d; } else { $SuccessDetail{$ip} = sprintf ("%-20s %-7d %-2.5f %-2.5f", $hostnameforlog, $stratum, $offset, $delay); } } # Ignore the final line for now, probably not needed at all # # if ($in =~ / adjust time server\s+([\d\.]+)\s+offset\s+([\d\.\-\+]+)\s+/) { # $ip = $1; # $offset = $2; # if (abs($offset) > $MaxOffset) { # $FailureDetail{$ip} = $in; # print "Fail: $ip $offset\n" if $opt_d; # } # } } # # Write results to logfile, if -l # if ($opt_l) { $LogFile = $opt_l; ($sec, $min, $hour, $mday, $Month, $Year, $wday, $yday, $isdst) = localtime($TimeOfDay); $Month++; $Year += 1900; $YYYYMM = sprintf('%04d%02d', $Year, $Month); $LogFile =~ s/YYYYMM/$YYYYMM/; # Fill in current year and month if (-e $LogFile) { # Check for existing log file $NewLogFile = 0; } else { $NewLogFile = 1; } open(LOG, ">>$LogFile") || warn "$0 Can't open logfile: $LogFile\n"; foreach $ip (sort keys %LogString) { print LOG "$LogString{$ip}\n"; } close LOG; } if ($opt_d) { foreach $ip (sort keys %LogString) { print "LOG: $LogString{$ip}\n"; } } foreach $ip (sort keys %FailureDetail) { push(@FailureIP, $FailureDetail{$ip}); push(@FailuresIP, $ip); } # # Otherwise we have one or more failures # foreach $ip (@FailuresIP) { if (exists $NameByIP{$ip}) { push(@FailuresName, $NameByIP{$ip}); } else { push(@FailuresName, $ip); } } @SortedFailures = sort @FailuresName; print "@SortedFailures\n"; if (@FailuresIP) { print "servers which have a failure:\n\n"; foreach $ip (sort keys %FailureDetail) { print "$NameByIP{$ip} $ip $FailureDetail{$ip}\n"; } print "\n\n"; } # # show details for non-failed hosts # print "servers which have no failures:\n\n"; printf ("%-20s %-2s %-8s %-8s\n", "server", "stratum", "offset", "delay"); print "-" x 50 . "\n"; foreach my $k (sort keys %SuccessDetail) { print "$SuccessDetail{$k}\n"; } if (@FailuresIP == 0) { # Indicate "all OK" to mon exit 0; } exit 1; # Indicate failure to mon # # Get the IP addresses for the hosts (because ntpdate returns IP addresses) # sub DNSlookups { my ($Hosts) = @_; $res = new Net::DNS::Resolver; for (my $i = 0; $i < @$Hosts; $i++) { $target = $Hosts->[$i]; $query = $res->search($target); if ($query) { foreach $rr ($query->answer) { #print "$target Type: ", $rr->type, "\n" if $opt_d; if ($rr->type eq "A") { print $rr->address . ' ' if $opt_d; $NameByIP{$rr->address} = $target; } } } } return %NameByIP; } |