implemented monitor-capacity flag for checking zpool capacity limits in the nagios monitoring format
This commit is contained in:
parent
01398789f6
commit
f961a9f447
|
@ -62,6 +62,11 @@ Which would be enough to tell sanoid to take and keep 36 hourly snapshots, 30 da
|
||||||
|
|
||||||
This option is designed to be run by a Nagios monitoring system. It reports on the health of the zpool your filesystems are on. It only monitors filesystems that are configured in the sanoid.conf file.
|
This option is designed to be run by a Nagios monitoring system. It reports on the health of the zpool your filesystems are on. It only monitors filesystems that are configured in the sanoid.conf file.
|
||||||
|
|
||||||
|
+ --monitor-capacity
|
||||||
|
|
||||||
|
This option is designed to be run by a Nagios monitoring system. It reports on the capacity of the zpool your filesystems are on. It only monitors pools that are configured in the sanoid.conf file. The default limits are 80% for the warning and 95% for the critical state. Those can be overridden by providing them
|
||||||
|
along like '=80,95".
|
||||||
|
|
||||||
+ --force-update
|
+ --force-update
|
||||||
|
|
||||||
This clears out sanoid's zfs snapshot listing cache. This is normally not needed.
|
This clears out sanoid's zfs snapshot listing cache. This is normally not needed.
|
||||||
|
|
146
sanoid
146
sanoid
|
@ -18,7 +18,8 @@ use Time::Local; # to parse dates in reverse
|
||||||
my %args = ("configdir" => "/etc/sanoid");
|
my %args = ("configdir" => "/etc/sanoid");
|
||||||
GetOptions(\%args, "verbose", "debug", "cron", "readonly", "quiet",
|
GetOptions(\%args, "verbose", "debug", "cron", "readonly", "quiet",
|
||||||
"monitor-health", "force-update", "configdir=s",
|
"monitor-health", "force-update", "configdir=s",
|
||||||
"monitor-snapshots", "take-snapshots", "prune-snapshots"
|
"monitor-snapshots", "take-snapshots", "prune-snapshots",
|
||||||
|
"monitor-capacity:s"
|
||||||
) or pod2usage(2);
|
) or pod2usage(2);
|
||||||
|
|
||||||
# If only config directory (or nothing) has been specified, default to --cron --verbose
|
# If only config directory (or nothing) has been specified, default to --cron --verbose
|
||||||
|
@ -52,6 +53,7 @@ my @params = ( \%config, \%snaps, \%snapsbytype, \%snapsbypath );
|
||||||
if ($args{'debug'}) { $args{'verbose'}=1; blabber (@params); }
|
if ($args{'debug'}) { $args{'verbose'}=1; blabber (@params); }
|
||||||
if ($args{'monitor-snapshots'}) { monitor_snapshots(@params); }
|
if ($args{'monitor-snapshots'}) { monitor_snapshots(@params); }
|
||||||
if ($args{'monitor-health'}) { monitor_health(@params); }
|
if ($args{'monitor-health'}) { monitor_health(@params); }
|
||||||
|
if (defined($args{'monitor-capacity'})) { monitor_capacity(@params); }
|
||||||
if ($args{'force-update'}) { my $snaps = getsnaps( \%config, $cacheTTL, 1 ); }
|
if ($args{'force-update'}) { my $snaps = getsnaps( \%config, $cacheTTL, 1 ); }
|
||||||
|
|
||||||
if ($args{'cron'}) {
|
if ($args{'cron'}) {
|
||||||
|
@ -174,6 +176,57 @@ sub monitor_snapshots {
|
||||||
exit $errorlevel;
|
exit $errorlevel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
####################################################################################
|
||||||
|
####################################################################################
|
||||||
|
####################################################################################
|
||||||
|
|
||||||
|
sub monitor_capacity {
|
||||||
|
my ($config, $snaps, $snapsbytype, $snapsbypath) = @_;
|
||||||
|
my %pools;
|
||||||
|
my @messages;
|
||||||
|
my $errlevel=0;
|
||||||
|
|
||||||
|
my %capacitylimits = (
|
||||||
|
"warn" => 80,
|
||||||
|
"crit" => 95
|
||||||
|
);
|
||||||
|
|
||||||
|
# if provided, parse capacity limits
|
||||||
|
if ($args{'monitor-capacity'} ne "") {
|
||||||
|
my @values = split(',', $args{'monitor-capacity'});
|
||||||
|
|
||||||
|
if (!check_capacity_limit($values[0])) {
|
||||||
|
die "ERROR: invalid zpool capacity warning limit!\n";
|
||||||
|
}
|
||||||
|
$capacitylimits{"warn"} = $values[0];
|
||||||
|
|
||||||
|
if (scalar @values > 1) {
|
||||||
|
if (!check_capacity_limit($values[1])) {
|
||||||
|
die "ERROR: invalid zpool capacity critical limit!\n";
|
||||||
|
}
|
||||||
|
$capacitylimits{"crit"} = $values[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach my $path (keys %{ $snapsbypath}) {
|
||||||
|
my @pool = split ('/',$path);
|
||||||
|
$pools{$pool[0]}=1;
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach my $pool (keys %pools) {
|
||||||
|
my ($exitcode, $msg) = check_zpool_capacity($pool,\%capacitylimits);
|
||||||
|
if ($exitcode > $errlevel) { $errlevel = $exitcode; }
|
||||||
|
chomp $msg;
|
||||||
|
push (@messages, $msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
my @warninglevels = ('','*** WARNING *** ','*** CRITICAL *** ');
|
||||||
|
my $message = $warninglevels[$errlevel] . join (', ',@messages);
|
||||||
|
print "$message\n";
|
||||||
|
exit $errlevel;
|
||||||
|
}
|
||||||
|
|
||||||
####################################################################################
|
####################################################################################
|
||||||
####################################################################################
|
####################################################################################
|
||||||
####################################################################################
|
####################################################################################
|
||||||
|
@ -900,6 +953,70 @@ sub check_zpool() {
|
||||||
return ($ERRORS{$state},$msg);
|
return ($ERRORS{$state},$msg);
|
||||||
} # end check_zpool()
|
} # end check_zpool()
|
||||||
|
|
||||||
|
sub check_capacity_limit() {
|
||||||
|
my $value = shift;
|
||||||
|
|
||||||
|
if ($value !~ /^\d+\z/) {
|
||||||
|
return undef;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($value < 1 || $value > 100) {
|
||||||
|
return undef;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
sub check_zpool_capacity() {
|
||||||
|
my %ERRORS=('DEPENDENT'=>4,'UNKNOWN'=>3,'OK'=>0,'WARNING'=>1,'CRITICAL'=>2);
|
||||||
|
my $state="UNKNOWN";
|
||||||
|
my $msg="FAILURE";
|
||||||
|
|
||||||
|
my $pool=shift;
|
||||||
|
my $capacitylimitsref=shift;
|
||||||
|
my %capacitylimits=%$capacitylimitsref;
|
||||||
|
|
||||||
|
my $statcommand="/sbin/zpool list -H -o cap $pool";
|
||||||
|
|
||||||
|
if (! open STAT, "$statcommand|") {
|
||||||
|
print ("$state '$statcommand' command returns no result!\n");
|
||||||
|
exit $ERRORS{$state};
|
||||||
|
}
|
||||||
|
|
||||||
|
my $line = <STAT>;
|
||||||
|
close(STAT);
|
||||||
|
|
||||||
|
chomp $line;
|
||||||
|
my @row = split(/ +/, $line);
|
||||||
|
my $cap=$row[0];
|
||||||
|
|
||||||
|
## check for valid capacity value
|
||||||
|
if ($cap !~ m/^[0-9]{1,3}%$/ ) {
|
||||||
|
$state = "CRITICAL";
|
||||||
|
$msg = sprintf "ZPOOL {%s} does not exist and/or is not responding!\n", $pool;
|
||||||
|
print $state, " ", $msg;
|
||||||
|
exit ($ERRORS{$state});
|
||||||
|
}
|
||||||
|
|
||||||
|
$state="OK";
|
||||||
|
|
||||||
|
# check capacity
|
||||||
|
my $capn = $cap;
|
||||||
|
$capn =~ s/\D//g;
|
||||||
|
|
||||||
|
if ($capn >= $capacitylimits{"warn"}) {
|
||||||
|
$state = "WARNING";
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($capn >= $capacitylimits{"crit"}) {
|
||||||
|
$state = "CRITICAL";
|
||||||
|
}
|
||||||
|
|
||||||
|
$msg = sprintf "ZPOOL %s : %s\n", $pool, $cap;
|
||||||
|
$msg = "$state $msg";
|
||||||
|
return ($ERRORS{$state},$msg);
|
||||||
|
} # end check_zpool_capacity()
|
||||||
|
|
||||||
######################################################################################################
|
######################################################################################################
|
||||||
######################################################################################################
|
######################################################################################################
|
||||||
######################################################################################################
|
######################################################################################################
|
||||||
|
@ -1070,19 +1187,20 @@ Assumes --cron --verbose if no other arguments (other than configdir) are specif
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
|
|
||||||
--configdir=DIR Specify a directory to find config file sanoid.conf
|
--configdir=DIR Specify a directory to find config file sanoid.conf
|
||||||
|
|
||||||
--cron Creates snapshots and purges expired snapshots
|
--cron Creates snapshots and purges expired snapshots
|
||||||
--verbose Prints out additional information during a sanoid run
|
--verbose Prints out additional information during a sanoid run
|
||||||
--readonly Simulates creation/deletion of snapshots
|
--readonly Simulates creation/deletion of snapshots
|
||||||
--quiet Suppresses non-error output
|
--quiet Suppresses non-error output
|
||||||
--force-update Clears out sanoid's zfs snapshot cache
|
--force-update Clears out sanoid's zfs snapshot cache
|
||||||
|
|
||||||
--monitor-health Reports on zpool "health", in a Nagios compatible format
|
--monitor-health Reports on zpool "health", in a Nagios compatible format
|
||||||
--monitor-snapshots Reports on snapshot "health", in a Nagios compatible format
|
--monitor-capacity[=wlimit[,climit]] Reports on zpool capacity, in a Nagios compatible format
|
||||||
--take-snapshots Creates snapshots as specified in sanoid.conf
|
--monitor-snapshots Reports on snapshot "health", in a Nagios compatible format
|
||||||
--prune-snapshots Purges expired snapshots as specified in sanoid.conf
|
--take-snapshots Creates snapshots as specified in sanoid.conf
|
||||||
|
--prune-snapshots Purges expired snapshots as specified in sanoid.conf
|
||||||
|
|
||||||
--help Prints this helptext
|
--help Prints this helptext
|
||||||
--version Prints the version number
|
--version Prints the version number
|
||||||
--debug Prints out a lot of additional information during a sanoid run
|
--debug Prints out a lot of additional information during a sanoid run
|
||||||
|
|
Loading…
Reference in New Issue