[Mon-commit] mon/mon.d snmpdiskspace.monitor,NONE,1.1.2.1
Brought to you by:
trockij
|
From: Jim T. <tr...@us...> - 2007-05-02 23:25:12
|
Update of /cvsroot/mon/mon/mon.d
In directory sc8-pr-cvs16:/tmp/cvs-serv5137/mon.d
Added Files:
Tag: mon-1-2-branch
snmpdiskspace.monitor
Log Message:
added snmpdiskspace.monitor with eravin's patches
--- NEW FILE: snmpdiskspace.monitor ---
#!/usr/local/bin/perl
#
# NAME
# snmpdiskspace.monitor
#
#
# SYNOPSIS
# snmpdiskspace.monitor [--list] [--timeout seconds] [--config filename]
# [--community string] [--free minfree]
# [--retries retries] [--usemib <mibtype>] host...
#
#
# DESCRIPTION
# This script uses the Host Resources MIB (RFC1514), and optionally
# the MS Windows NT Performance MIB, or UCD-SNMP extensions
# (enterprises.ucdavis.dskTable.dskEntry) to monitor diskspace on hosts
# via SNMP.
#
# snmpdiskspace.monitor uses a config file to allow the specification of
# minimum free space on a per-host and per-partition basis. The config
# file allows the use of regular expressions, so it is quite flexible in
# what it can allow. See the sample config file for more details and
# syntax.
#
# The script only checks disks marked as "FixedDisks" by the Host MIB,
# which should help cut down on the number of CD-ROM drives
# erroneously reported as being full! Since the drive classification
# portion of the UCD Host MIB isn't too great on many OS'es, though,
# this won't buy you a lot. Empire's SNMP agent gets this right on
# all the hosts that I checked, though. Not sure about the MS MIB.
# UCD-SNMP only checks specific partition types (md, hd, sd, ida)
#
# snmpdiskspace.monitor is intended for use as a monitor for the mon
# network monitoring package.
#
#
# OPTIONS
# --community The SNMP community string to use. Default is "public".
# --config The config file to use. Default is either
# /etc/mon/snmpdiskspace.cf or
# /usr/lib/mon/mon.d/snmpdiskspace.cf, in that order.
# --retries The number of retries to use, if we get an SNMP timeout.
# Default is retry 5 times.
# --timeout Seconds to wait before declaring a timeout on an SNMP get.
# Default is 20 seconds.
# --free The default minimum free space, in a percentage or absolute
# quantity, as per the config file. Thus, arguments of, for
# example, "20%", "1gb", "50mb" are all valid.
# Default is 5% free on every partition checked.
#
# --ifree The default minimum free inode percentage, specified as
# a percentage. Default is 5% free.
#
# --list Give a verbose listing of all partitions checked on all
# specified hosts.
#
# --listall like --list, but also lists the thresholds defined for
# each filesystem, so you can doublecheck the config file
#
# --usemib Choose which MIB to use: one or more of host, perf, ucd
# Default tries all three, in that order
#
# --debug enable debug output for config file parsing and MIB fetching
#
#
# EXIT STATUS
# Exit status is as follows:
# 0 No problems detected.
# 1 Free space on any host was below the supplied parameter.
# 2 A "soft" error occurred, either a SNMP library error,
# or could not get a response from the server.
#
# In the case where both a soft error and a freespace violation are
# detected, exit status is 1.
#
# BUGS
# When using the net-snmp agent, you must build it with "--with-dummy-values"
# or the monitor may not parse the Host Resources MIB properly.
#
# List of local filesystem types used when parsing the UCD MIB should be
# configurable.
#
#
# NOTES
# $Id: snmpdiskspace.monitor,v 1.1.2.1 2007/05/02 23:25:07 trockij Exp $
#
# * Added support for inode status via UCD-SNMP MIB. Fourth column in config
# file (optional) is for inode%.
# * added --debug and --usemib options. Latter needed so you can force use
# of UCD mib if you want inode status.
# * rearranged the error messages to be more Mon-like (hostname first)
# * added code to synchronize instance numbers when using UCD MIB. This
# could solve the "sparse MIB" problem usually fixed by the
# --with-dummy-values option in net-snmp if needed for other agents
# Ed Ravin (er...@pa...), January 2005
#
# Added support for regex hostnames and partition names in the config file,
# 'use strict' by andrew ryan <an...@na...>.
#
# Generalised to handle multible mibs by jens persson <jen...@bt...>
# Changes Copyright (C) 2000, jens persson
#
# Modified for use with UCD-SNMP by Johannes Walch for
# NWE GmbH (j....@nw...)
#
# Support for UCD's disk MIB added by Matt Simonsen <ma...@ca...>
#
#
# SEE ALSO
# mon: http://www.kernel.org/software/mon/
#
# This requires the UCD SNMP library and G.S. Marzot's Perl SNMP
# module. (http://ucd-snmp.ucdavis.edu and CPAN, respectively).
#
# The Empire SystemEdge SNMP agent: http://www.empire.com
#
#
# COPYRIGHT
#
# Copyright (C) 1998, Jim Trocki
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
use strict;
use SNMP;
use Getopt::Long;
sub readcf;
sub toBytes;
sub get_values;
# setup what mibs to use
# $ENV{"MIBS"} = 'RFC1213-MIB:HOST-RESOURCES-MIB:WINDOWS-NT-PERFORMANCE:UCD-SNMP-MIB';
$ENV{"MIBS"} = 'RFC1213-MIB:HOST-RESOURCES-MIB:UCD-SNMP-MIB';
my %opt;
# parse the commandline
GetOptions (\%opt, "community=s", "timeout=i", "retries=i", "config=s", "list", "listall", "free=i", "ifree=n", "usemib=s", "debug");
die "No host arguments given!\n" if (@ARGV == 0);
my $RET = 0; #exit value of script
my @ERRS = (); # array holding detail output
my @HOSTS = (); # array holding summary output
my @cfgfile = (); #array holding contents of config file
# Read in defaults
my $COMM = $opt{"community"} || $ENV{"COMMUNITY"} || "public";
my $TIMEOUT = $opt{"timeout"} * 100000 || 2000000; #default timeout is 20 seconds
my $RETRIES = $opt{"retries"} || 5;
my $CONFIG = $opt{"config"} || (-d "/etc/mon" ? "/etc/mon" : "/usr/lib/mon/mon.d")
. "/snmpdiskspace.cf";
my $DISKFREE = $opt{"free"} || -5; #default max % full is 95%
my $INODEFREE = $opt{"ifree"} || 5; #default max % inode full is 95%
my $USEMIB= $opt{"usemib"} || "host perf ucd";
my $LIST= $opt{"list"} || $opt{"listall"} || 0;
my $LISTALL= $opt{"listall"} || 0;
my $DEBUG= $opt{"debug"} || 0;
my ($host, $checkval, $icheckval, %FREE, $disk, @disklist, $cfgline);
# read the config file
if ( !readcf ($CONFIG) ) {
# not being able to read config file shouldn't be a fatal, since we
# have defaults we can use.
print STDERR "readcf: Could not read config file $CONFIG: $!\n";
}
# now do the checks for each host
foreach $host (@ARGV) {
# fetch the info from the computers
@disklist = get_values($host);
next unless (@disklist) && (ref($disklist[0]) eq "ARRAY"); #make sure we got an OK return value from get_values before going any further
# Now check each partition
foreach $disk (@disklist) {
undef $checkval ;
undef $icheckval ;
# Go through the config file line by line until we
# find a match for this host/partition. Stop as soon
# as we find a match.
foreach $cfgline (@cfgfile) {
if ( ($host =~ m/^$cfgline->[0]$/) &&
($disk->[2] =~ m/^$cfgline->[1]$/)
) {
print STDERR "'$host' matched /^$cfgline->[0]\$/ or '$disk->[2]' matched /^$cfgline->[1]\$/, using checkval $cfgline->[2]\n" if $DEBUG;
$checkval = $cfgline->[2] ;
$icheckval= $cfgline->[3] ;
last;
}
}
# Set to default otherwise
$checkval = $DISKFREE unless defined($checkval);
$icheckval= $INODEFREE unless defined($icheckval);
$icheckval=~ s/%$//;
# do the checking, first absolute and then percentage
next if $checkval == 0 && $icheckval == 0; # nothing to check: ignore
my $hostfailed= 0;
if (($checkval > 0) && ($disk->[0] <$checkval)) {
$hostfailed++;
push (@ERRS,sprintf("%s: filesystem %s is (%1.1f%% full), %1.0fMB free (below threshold %1.0fMB free)",
$host , $disk->[2] , $disk->[1] , $disk->[0] / 1048576, $checkval / 1048576 ));
} elsif (($checkval < 0) && ($disk->[1] - $checkval >=100)) {
$hostfailed++;
push (@ERRS,sprintf("%s: filesystem %s is (%1.1f%% full), %1.0fMB free (below threshold %s%% free)",
$host , $disk->[2] , $disk->[1] , $disk->[0] / 1048576, abs($checkval) ));
}
if (($icheckval > 0) && ($disk->[3] ne "N/A") && (100 - $disk->[3]) < $icheckval ) {
$hostfailed++;
push (@ERRS, sprintf ("%s: filesystem %s has %1.1f%% inodes free (below threshold %s%% inodes free)",
$host, $disk->[2], 100 - $disk->[3], $icheckval ));
}
if ($hostfailed)
{
push (@HOSTS, $host);
$RET = 1;
}
# if the user want a listing, then the user will get a listing :-)
write if ($LIST or $LISTALL);
if ($LISTALL) {
printf(" Will alarm if MB free declines below threshold %1.0fMB free\n", $checkval / 1048576) if $checkval > 0;
printf(" Will alarm if %%free space declines below threshold %1.1f%% free\n", abs($checkval)) if $checkval < 0;
printf(" No free space alarm defined in config file.\n") if $checkval == 0;
printf(" Will alarm if %%free inodes declines below %1.1f%%\n", $icheckval) if $icheckval > 0;
printf(" No %%inodes free alarm defined in config file.\n") if $icheckval == 0;
printf(" WARNING: Unable to alarm on inodes free, dskPercentNode not found in MIB\n") if $disk->[3] eq "N/A" and $icheckval > 0;
}
}
}
if ($LIST or $LISTALL) {
print "\n\n";
}
# Uniq the array of failures, so multiple failures on a single host
# are reported in the details section (lines #2-infinity) but not
# in the summary (line #1).
# Then print out the failures, if any.
my %saw;
undef %saw;
@saw{@HOSTS} = ();
@HOSTS = keys %saw;
if ($RET) {
print "@HOSTS\n";
print "\n";
print join("\n", @ERRS), "\n";
}
exit $RET;
#
# read configuration file
#
sub readcf {
my ($f) = @_;
my ($l, $host, $filesys, $free, $ifree);
open (CF, $f) || return undef;
while (<CF>) {
next if (/^\s*#/ || /^\s*$/);
chomp;
($host, $filesys, $free, $ifree) = split;
# if (!defined ($FREE{$host}{$filesys} = toBytes ($free))) {
if (!push (@cfgfile, [$host , $filesys , toBytes ($free), $ifree || 0]) ) {
die "error free specification, config $f, line $.\n";
}
print STDERR "cf: assigned host=$host, filesys=$filesys, free=$free, ifree=$ifree\n" if $DEBUG;
}
close (CF);
}
sub toBytes {
# take a string and parse it as folows
# N return N
# N kb return N*1024
# N mb return N*1024^2
# N gb return N*1024^3
# N % return -N
my ($free) = @_;
my ($n, $u);
if ($free =~ /^(\d+\.\d+)(kb|mb|gb|%|)$/i) {
($n, $u) = ($1, "\L$2");
} elsif ($free =~ /^(\d+)(kb|mb|gb|%|)$/i) {
($n, $u) = ($1, "\L$2");
} else {
return undef;
}
return (int ($n * -1)) if ($u eq "%");
return (int ($n * 1024 )) if ($u eq "kb");
return (int ($n * 1024 * 1024)) if ($u eq "mb");
return (int ($n * 1024 * 1024 * 1024)) if ($u eq "gb");
int ($n);
}
#
# Do the work of trying to get the data from the host via SNMP
#
sub get_values {
my ($host) = @_;
my (@disklist,$Type,$Descr,$AllocationUnits,$Size,$Used,$Freespace,$Percent,$InodePercent);
my ($v,$s);
if (!defined($s = new SNMP::Session (DestHost => $host,
Timeout => $TIMEOUT, Community => $COMM,
Retries => $RETRIES))) {
$RET = ($RET == 1) ? 1 : 2 ;
push (@HOSTS, $host);
push (@ERRS, "$host: could not create session: " . $s->{ErrorStr});
return undef;
}
# First we try to use the Host mib (RFC1514)
# supported by net-snmpd on most platforms, see http://www.net-snmp.org
#
# You can also use the Empire (http://www.empire.com)
# SNMP agent to provide hostmib support on UNIX and NT.
if ($USEMIB =~ /host/i)
{
$v = new SNMP::VarList (
['hrStorageIndex'],
['hrStorageType'],
['hrStorageDescr'],
['hrStorageAllocationUnits'],
['hrStorageSize'],
['hrStorageUsed'],
);
while (defined $s->getnext($v)) {
last if ($v->[0]->tag !~ /hrStorageIndex/);
$Type = $v->[1]->val;
$Descr = $v->[2]->val;
$AllocationUnits = $v->[3]->val;
$Size = $v->[4]->val;
$Used = $v->[5]->val;
$Freespace = (($Size - $Used) * $AllocationUnits);
print STDERR "Found HOST MIB filesystem: Type=$Type, Descr=$Descr, AllocationUnits=$AllocationUnits, Size=$Size, Used=$Used\n" if $DEBUG;
# This next check makes sure we're only looking at storage
# devices of the "FixedDevice" type (4). For comparison, Physical
# RAM is 2, Virtual Memory is 3, Floppy Disk is 6, and CD-ROM is 7
# Using the Empire agent, this will eliminate drive types other
# than hard disks. The UCD agent is not as good as determining
# drive types under the HOST mib.
next if ($Type !~ /\.1\.3\.6\.1\.2\.1\.25\.2\.1\.4/);
if ($Size != 0) {
$Percent= ($Used / $Size) * 100.0;
} else {
$Percent=0;
};
push (@disklist,[$Freespace,$Percent,$Descr, "N/A"]);
print STDERR "Using HOST MIB filesystem: $Descr ($Type)\n" if $DEBUG;
};
if (@disklist) {
return @disklist;
};
};
# Then we test the perfmib from M$ NT resource kit
# I'm using the agent/mib-defs from
# http://www.wtcs.org/snmp4tpc/
# for somereason every second request fails,
# so we fetch the variables twice and discards
# the bad ones
if ($USEMIB =~ /perf/i)
{
$v = new SNMP::VarList (
['ldisklogicalDiskIndex'],
['ldiskPercentFreeSpace'],
['ldiskPercentFreeSpace'],
['ldiskFreeMegabytes'],
['ldiskFreeMegabytes'],
);
while (defined $s->getnext($v)) {
# Make sure we are still in relevant portion of MIB
last if ($v->[1]->val !~ /^\.1\.3\.6\.1\.2\.1\.25\.2\.1\.4/);
last if ($v->[0]->val =~ /Total/);
$Descr = ( $v->[0]->val =~ /.*:.*:(\w+:)$/gi)[-1] ;
$Percent = $v->[2]->val;
$Freespace = $v->[4]->val * 1024 * 1024;
push (@disklist,[$Freespace,$Percent,$Descr, "N/A"]);
print STDERR "Using PERF MIB filesystem: $Descr, $Freespace,$Percent\n" if $DEBUG;
};
if (@disklist) {
return @disklist;
}
}
#Try UCD-SNMP .enterprises.ucdavis.dskTable.dskEntry MIB extrnsion
# Comes with UCD-SNMP / net-snmp
if ($USEMIB =~ /ucd/i)
{
$v = new SNMP::VarList (
['dskIndex'],
['dskPath'],
['dskPercent'],
['dskAvail'],
['dskDevice'],
['dskPercentNode'],
);
while (defined $s->getnext($v)) {
last if ($v->[0]->tag !~ /dskIndex/); # end of MIB?
my $instancenum= $v->[0]->iid; # what instance number?
# check for partial fetches (like swap partition) that won't
# return all the MIB entries
if ($v->[2]->iid != $instancenum or
$v->[3]->iid != $instancenum or
$v->[5]->iid != $instancenum)
{
# ignore this instance and try to move on to next
# we wouldn't need this if use-dummy-values really worked
$v = new SNMP::VarList (
['dskIndex', $instancenum],
['dskPath', $instancenum],
['dskPercent', $instancenum],
['dskAvail', $instancenum],
['dskDevice', $instancenum],
['dskPercentNode', $instancenum],
);
next;
}
$Descr = $v->[1]->val;
$Percent = $v->[2]->val;
$Freespace = $v->[3]->val;
$Freespace *= 1024; #Convert from kbytes to bytes to make consistent
$Type = $v->[4]->val;
$InodePercent = $v->[5]->val;
print STDERR "Found UCD MIB filesystem: Type=$Type, Descr=$Descr, Percent=$Percent, Freespace=$Freespace, InodePercent=$InodePercent\n" if $DEBUG;
# Try to catch only local filesystems. This covers the
# the basics, but probably should be configurable
next unless ( $Type =~ m/\b(md|hd|wd|sd|ida|raid)/ ) ;
print STDERR "Using UCD MIB filesystem: $Descr ($Type)\n" if $DEBUG;
push (@disklist,[$Freespace,$Percent,$Descr, $InodePercent]);
};
if (@disklist) {
return @disklist;
}
}
#Check for errors
if ($s->{ErrorNum}) {
push (@HOSTS, $host);
push (@ERRS, "$host: could not get SNMP info: " . $s->{ErrorStr});
$RET = ($RET == 1) ? 1 : 2 ;
return undef;
}
# Check for OID not found
push (@HOSTS, $host);
push (@ERRS, "$host: Disk space OIDs not found in MIB(s): $USEMIB");
$RET = ($RET == 1) ? 1 : 2 ;
return undef;
}
# format specifications, should be able to cut, paste and edit into a config file
format STDOUT_TOP =
System Description % Used Free space Inode%
-------------------------------------------------------------------------------
.
format STDOUT =
@<<<<<<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<< @###.# % @#######.# mb @>>>>>>
$host, $disk->[2], $disk->[1], $disk->[0]/1024/1024, ( $disk->[3] ne "N/A" ? ($disk->[3] + 0) . "%" : "N/A")
.
|