Merge pull request #201 from phreaker0/monitor-capacity

implemented monitor-capacity flag for checking zpool capacity limits
This commit is contained in:
Jim Salter 2018-06-28 16:03:25 -04:00 committed by GitHub
commit 8ee41752c7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 135 additions and 1 deletions

View File

@ -64,6 +64,10 @@ Which would be enough to tell sanoid to take and keep 36 hourly snapshots, 30 da
This option is designed to be run by a Nagios monitoring system. It reports on the health of the zpool your filesystems are on. It only monitors filesystems that are configured in the sanoid.conf file.
+ --monitor-capacity
This option is designed to be run by a Nagios monitoring system. It reports on the capacity of the zpool your filesystems are on. It only monitors pools that are configured in the sanoid.conf file.
+ --force-update
This clears out sanoid's zfs snapshot listing cache. This is normally not needed.

128
sanoid
View File

@ -18,7 +18,8 @@ use Time::Local; # to parse dates in reverse
my %args = ("configdir" => "/etc/sanoid");
GetOptions(\%args, "verbose", "debug", "cron", "readonly", "quiet",
"monitor-health", "force-update", "configdir=s",
"monitor-snapshots", "take-snapshots", "prune-snapshots"
"monitor-snapshots", "take-snapshots", "prune-snapshots",
"monitor-capacity"
) or pod2usage(2);
# If only config directory (or nothing) has been specified, default to --cron --verbose
@ -52,6 +53,7 @@ my @params = ( \%config, \%snaps, \%snapsbytype, \%snapsbypath );
if ($args{'debug'}) { $args{'verbose'}=1; blabber (@params); }
if ($args{'monitor-snapshots'}) { monitor_snapshots(@params); }
if ($args{'monitor-health'}) { monitor_health(@params); }
if ($args{'monitor-capacity'}) { monitor_capacity(@params); }
if ($args{'force-update'}) { my $snaps = getsnaps( \%config, $cacheTTL, 1 ); }
if ($args{'cron'}) {
@ -174,6 +176,61 @@ sub monitor_snapshots {
exit $errorlevel;
}
####################################################################################
####################################################################################
####################################################################################
sub monitor_capacity {
my ($config, $snaps, $snapsbytype, $snapsbypath) = @_;
my %pools;
my @messages;
my $errlevel=0;
# build pool list with corresponding capacity limits
foreach my $section (keys %config) {
my @pool = split ('/',$section);
if (scalar @pool == 1 || !defined($pools{$pool[0]}) ) {
my %capacitylimits;
if (!check_capacity_limit($config{$section}{'capacity_warn'})) {
die "ERROR: invalid zpool capacity warning limit!\n";
}
if ($config{$section}{'capacity_warn'} != 0) {
$capacitylimits{'warn'} = $config{$section}{'capacity_warn'};
}
if (!check_capacity_limit($config{$section}{'capacity_crit'})) {
die "ERROR: invalid zpool capacity critical limit!\n";
}
if ($config{$section}{'capacity_crit'} != 0) {
$capacitylimits{'crit'} = $config{$section}{'capacity_crit'};
}
if (%capacitylimits) {
$pools{$pool[0]} = \%capacitylimits;
}
}
}
foreach my $pool (keys %pools) {
my $capacitylimitsref = $pools{$pool};
my ($exitcode, $msg) = check_zpool_capacity($pool,\%$capacitylimitsref);
if ($exitcode > $errlevel) { $errlevel = $exitcode; }
chomp $msg;
push (@messages, $msg);
}
my @warninglevels = ('','*** WARNING *** ','*** CRITICAL *** ');
my $message = $warninglevels[$errlevel] . join (', ',@messages);
print "$message\n";
exit $errlevel;
}
####################################################################################
####################################################################################
####################################################################################
@ -900,6 +957,74 @@ sub check_zpool() {
return ($ERRORS{$state},$msg);
} # end check_zpool()
sub check_capacity_limit() {
my $value = shift;
if (!defined($value) || $value !~ /^\d+\z/) {
return undef;
}
if ($value < 0 || $value > 100) {
return undef;
}
return 1
}
sub check_zpool_capacity() {
my %ERRORS=('DEPENDENT'=>4,'UNKNOWN'=>3,'OK'=>0,'WARNING'=>1,'CRITICAL'=>2);
my $state="UNKNOWN";
my $msg="FAILURE";
my $pool=shift;
my $capacitylimitsref=shift;
my %capacitylimits=%$capacitylimitsref;
my $statcommand="/sbin/zpool list -H -o cap $pool";
if (! open STAT, "$statcommand|") {
print ("$state '$statcommand' command returns no result!\n");
exit $ERRORS{$state};
}
my $line = <STAT>;
close(STAT);
chomp $line;
my @row = split(/ +/, $line);
my $cap=$row[0];
## check for valid capacity value
if ($cap !~ m/^[0-9]{1,3}%$/ ) {
$state = "CRITICAL";
$msg = sprintf "ZPOOL {%s} does not exist and/or is not responding!\n", $pool;
print $state, " ", $msg;
exit ($ERRORS{$state});
}
$state="OK";
# check capacity
my $capn = $cap;
$capn =~ s/\D//g;
if (defined($capacitylimits{"warn"})) {
if ($capn >= $capacitylimits{"warn"}) {
$state = "WARNING";
}
}
if (defined($capacitylimits{"crit"})) {
if ($capn >= $capacitylimits{"crit"}) {
$state = "CRITICAL";
}
}
$msg = sprintf "ZPOOL %s : %s\n", $pool, $cap;
$msg = "$state $msg";
return ($ERRORS{$state},$msg);
} # end check_zpool_capacity()
######################################################################################################
######################################################################################################
######################################################################################################
@ -1079,6 +1204,7 @@ Options:
--force-update Clears out sanoid's zfs snapshot cache
--monitor-health Reports on zpool "health", in a Nagios compatible format
--monitor-capacity Reports on zpool capacity, in a Nagios compatible format
--monitor-snapshots Reports on snapshot "health", in a Nagios compatible format
--take-snapshots Creates snapshots as specified in sanoid.conf
--prune-snapshots Purges expired snapshots as specified in sanoid.conf

View File

@ -70,3 +70,7 @@ monthly_warn = 32
monthly_crit = 35
yearly_warn = 0
yearly_crit = 0
# default limits for capacity checks (if set to 0, limit will not be checked)
capacity_warn = 80
capacity_crit = 95