Merge pull request #201 from phreaker0/monitor-capacity
implemented monitor-capacity flag for checking zpool capacity limits
This commit is contained in:
commit
8ee41752c7
|
@ -64,6 +64,10 @@ Which would be enough to tell sanoid to take and keep 36 hourly snapshots, 30 da
|
|||
|
||||
This option is designed to be run by a Nagios monitoring system. It reports on the health of the zpool your filesystems are on. It only monitors filesystems that are configured in the sanoid.conf file.
|
||||
|
||||
+ --monitor-capacity
|
||||
|
||||
This option is designed to be run by a Nagios monitoring system. It reports on the capacity of the zpool your filesystems are on. It only monitors pools that are configured in the sanoid.conf file.
|
||||
|
||||
+ --force-update
|
||||
|
||||
This clears out sanoid's zfs snapshot listing cache. This is normally not needed.
|
||||
|
|
128
sanoid
128
sanoid
|
@ -18,7 +18,8 @@ use Time::Local; # to parse dates in reverse
|
|||
my %args = ("configdir" => "/etc/sanoid");
|
||||
GetOptions(\%args, "verbose", "debug", "cron", "readonly", "quiet",
|
||||
"monitor-health", "force-update", "configdir=s",
|
||||
"monitor-snapshots", "take-snapshots", "prune-snapshots"
|
||||
"monitor-snapshots", "take-snapshots", "prune-snapshots",
|
||||
"monitor-capacity"
|
||||
) or pod2usage(2);
|
||||
|
||||
# If only config directory (or nothing) has been specified, default to --cron --verbose
|
||||
|
@ -52,6 +53,7 @@ my @params = ( \%config, \%snaps, \%snapsbytype, \%snapsbypath );
|
|||
if ($args{'debug'}) { $args{'verbose'}=1; blabber (@params); }
|
||||
if ($args{'monitor-snapshots'}) { monitor_snapshots(@params); }
|
||||
if ($args{'monitor-health'}) { monitor_health(@params); }
|
||||
if ($args{'monitor-capacity'}) { monitor_capacity(@params); }
|
||||
if ($args{'force-update'}) { my $snaps = getsnaps( \%config, $cacheTTL, 1 ); }
|
||||
|
||||
if ($args{'cron'}) {
|
||||
|
@ -174,6 +176,61 @@ sub monitor_snapshots {
|
|||
exit $errorlevel;
|
||||
}
|
||||
|
||||
|
||||
####################################################################################
|
||||
####################################################################################
|
||||
####################################################################################
|
||||
|
||||
sub monitor_capacity {
|
||||
my ($config, $snaps, $snapsbytype, $snapsbypath) = @_;
|
||||
my %pools;
|
||||
my @messages;
|
||||
my $errlevel=0;
|
||||
|
||||
# build pool list with corresponding capacity limits
|
||||
foreach my $section (keys %config) {
|
||||
my @pool = split ('/',$section);
|
||||
|
||||
if (scalar @pool == 1 || !defined($pools{$pool[0]}) ) {
|
||||
my %capacitylimits;
|
||||
|
||||
if (!check_capacity_limit($config{$section}{'capacity_warn'})) {
|
||||
die "ERROR: invalid zpool capacity warning limit!\n";
|
||||
}
|
||||
|
||||
if ($config{$section}{'capacity_warn'} != 0) {
|
||||
$capacitylimits{'warn'} = $config{$section}{'capacity_warn'};
|
||||
}
|
||||
|
||||
if (!check_capacity_limit($config{$section}{'capacity_crit'})) {
|
||||
die "ERROR: invalid zpool capacity critical limit!\n";
|
||||
}
|
||||
|
||||
if ($config{$section}{'capacity_crit'} != 0) {
|
||||
$capacitylimits{'crit'} = $config{$section}{'capacity_crit'};
|
||||
}
|
||||
|
||||
if (%capacitylimits) {
|
||||
$pools{$pool[0]} = \%capacitylimits;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach my $pool (keys %pools) {
|
||||
my $capacitylimitsref = $pools{$pool};
|
||||
|
||||
my ($exitcode, $msg) = check_zpool_capacity($pool,\%$capacitylimitsref);
|
||||
if ($exitcode > $errlevel) { $errlevel = $exitcode; }
|
||||
chomp $msg;
|
||||
push (@messages, $msg);
|
||||
}
|
||||
|
||||
my @warninglevels = ('','*** WARNING *** ','*** CRITICAL *** ');
|
||||
my $message = $warninglevels[$errlevel] . join (', ',@messages);
|
||||
print "$message\n";
|
||||
exit $errlevel;
|
||||
}
|
||||
|
||||
####################################################################################
|
||||
####################################################################################
|
||||
####################################################################################
|
||||
|
@ -900,6 +957,74 @@ sub check_zpool() {
|
|||
return ($ERRORS{$state},$msg);
|
||||
} # end check_zpool()
|
||||
|
||||
sub check_capacity_limit() {
|
||||
my $value = shift;
|
||||
|
||||
if (!defined($value) || $value !~ /^\d+\z/) {
|
||||
return undef;
|
||||
}
|
||||
|
||||
if ($value < 0 || $value > 100) {
|
||||
return undef;
|
||||
}
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
sub check_zpool_capacity() {
|
||||
my %ERRORS=('DEPENDENT'=>4,'UNKNOWN'=>3,'OK'=>0,'WARNING'=>1,'CRITICAL'=>2);
|
||||
my $state="UNKNOWN";
|
||||
my $msg="FAILURE";
|
||||
|
||||
my $pool=shift;
|
||||
my $capacitylimitsref=shift;
|
||||
my %capacitylimits=%$capacitylimitsref;
|
||||
|
||||
my $statcommand="/sbin/zpool list -H -o cap $pool";
|
||||
|
||||
if (! open STAT, "$statcommand|") {
|
||||
print ("$state '$statcommand' command returns no result!\n");
|
||||
exit $ERRORS{$state};
|
||||
}
|
||||
|
||||
my $line = <STAT>;
|
||||
close(STAT);
|
||||
|
||||
chomp $line;
|
||||
my @row = split(/ +/, $line);
|
||||
my $cap=$row[0];
|
||||
|
||||
## check for valid capacity value
|
||||
if ($cap !~ m/^[0-9]{1,3}%$/ ) {
|
||||
$state = "CRITICAL";
|
||||
$msg = sprintf "ZPOOL {%s} does not exist and/or is not responding!\n", $pool;
|
||||
print $state, " ", $msg;
|
||||
exit ($ERRORS{$state});
|
||||
}
|
||||
|
||||
$state="OK";
|
||||
|
||||
# check capacity
|
||||
my $capn = $cap;
|
||||
$capn =~ s/\D//g;
|
||||
|
||||
if (defined($capacitylimits{"warn"})) {
|
||||
if ($capn >= $capacitylimits{"warn"}) {
|
||||
$state = "WARNING";
|
||||
}
|
||||
}
|
||||
|
||||
if (defined($capacitylimits{"crit"})) {
|
||||
if ($capn >= $capacitylimits{"crit"}) {
|
||||
$state = "CRITICAL";
|
||||
}
|
||||
}
|
||||
|
||||
$msg = sprintf "ZPOOL %s : %s\n", $pool, $cap;
|
||||
$msg = "$state $msg";
|
||||
return ($ERRORS{$state},$msg);
|
||||
} # end check_zpool_capacity()
|
||||
|
||||
######################################################################################################
|
||||
######################################################################################################
|
||||
######################################################################################################
|
||||
|
@ -1079,6 +1204,7 @@ Options:
|
|||
--force-update Clears out sanoid's zfs snapshot cache
|
||||
|
||||
--monitor-health Reports on zpool "health", in a Nagios compatible format
|
||||
--monitor-capacity Reports on zpool capacity, in a Nagios compatible format
|
||||
--monitor-snapshots Reports on snapshot "health", in a Nagios compatible format
|
||||
--take-snapshots Creates snapshots as specified in sanoid.conf
|
||||
--prune-snapshots Purges expired snapshots as specified in sanoid.conf
|
||||
|
|
|
@ -70,3 +70,7 @@ monthly_warn = 32
|
|||
monthly_crit = 35
|
||||
yearly_warn = 0
|
||||
yearly_crit = 0
|
||||
|
||||
# default limits for capacity checks (if set to 0, limit will not be checked)
|
||||
capacity_warn = 80
|
||||
capacity_crit = 95
|
||||
|
|
Loading…
Reference in New Issue