[btrfs_device_stats] Make warning and critical configurable for all metrics per device.

This commit is contained in:
HaseHarald 2021-05-13 17:31:12 +02:00 committed by Lars Kruse
parent d45c84e59f
commit 8a2c0be5b2
1 changed files with 87 additions and 36 deletions

View File

@ -18,8 +18,9 @@ user root
You can optionaly configure the warning and critical limits. By default warning
is set to 1 and critical is not set at all. You can set the limits either for
the entire plugin or per individual metric. The individual values take
precedence over the general ones. See the following example:
the entire plugin or per individual metric and down to a specific device. The
more specific values take precedence over the general ones.
See the following example:
[btrfs_device_stats]
user root
@ -27,6 +28,7 @@ env.warning 2
env.critical 4
env.flags_warning 23
env.read_errs_critical 42
env.generation_errs_a04f3d6b_438c_4b61_979b_e5fda7fb858c_1_warning 187
=head2 DEFAULT CONFIGURATION
@ -90,11 +92,14 @@ def munin_config(fs):
print("")
devices = fs.devices()
warning = os.getenv('warning', default=1)
critical = os.getenv('critical', default=False)
for this_device in devices:
# Set defaults
warning = os.getenv('warning', default="1")
critical = os.getenv('critical', default=False)
# Get device informations
this_dev_info = fs.dev_info(this_device.devid)
this_dev_name = this_dev_info.path.replace('/dev/', '')
print("multigraph btrfs_device_stats_" + fsid + "." +
str(this_device.devid))
print("graph_args --base 1000 -l 0")
@ -103,51 +108,97 @@ def munin_config(fs):
print("graph_category disk")
print("graph_info This graph shows stats of devices used by btrfs")
# Labels and warning/critical values for Corruption Errors
this_corr_errs_warn = os.getenv('corruption_errs_warning',
default=warning)
this_corr_errs_warn = os.getenv('corruption_errs_' + fsid + "_" +
str(this_device.devid) + '_warning',
default=this_corr_errs_warn)
this_corr_errs_crit = os.getenv('corruption_errs_critical',
default=critical)
this_corr_errs_crit = os.getenv('corruption_errs_' + fsid + "_" +
str(this_device.devid) + '_critical',
default=this_corr_errs_crit)
print("corruption_errs.label Corruption Errors")
print("corruption_errs.warning " + os.getenv('corruption_errs_warning',
default=str(warning)))
if os.getenv('corruption_errs_critical', default=critical):
print("corruption_errs.critical " +
os.getenv('corruption_errs_critical',
default=str(critical)))
print("corruption_errs.warning " + this_corr_errs_warn)
if this_corr_errs_crit:
print("corruption_errs.critical " + this_corr_errs_crit)
# Labels and warning/critical values for Flush Errors
this_flush_errs_warn = os.getenv('flush_errs_warning', default=warning)
this_flush_errs_warn = os.getenv('flush_errs_' + fsid + "_" +
str(this_device.devid) + '_warning',
default=this_flush_errs_warn)
this_flush_errs_crit = os.getenv('flush_errs_critical',
default=critical)
this_flush_errs_crit = os.getenv('flush_errs_' + fsid + "_" +
str(this_device.devid) + '_critical',
default=this_flush_errs_crit)
print("flush_errs.label Flush Errors")
print("flush_errs.warning " + os.getenv('flush_errs_warning',
default=str(warning)))
if os.getenv('flush_errs_critical', default=critical):
print("flush_errs.critical " + os.getenv('flush_errs_critical',
default=str(critical)))
print("flush_errs.warning " + this_flush_errs_warn)
if this_flush_errs_crit:
print("flush_errs.critical " + this_flush_errs_crit)
# Labels and warning/critical values for Generation Errors
this_gen_errs_warn = os.getenv('generation_errs_warning',
default=warning)
this_gen_errs_warn = os.getenv('generation_errs_' + fsid + "_" +
str(this_device.devid) + '_warning',
default=this_gen_errs_warn)
this_gen_errs_crit = os.getenv('generation_errs_critical',
default=critical)
this_gen_errs_crit = os.getenv('generation_errs_' + fsid + "_" +
str(this_device.devid) + '_critical',
default=this_gen_errs_crit)
print("generation_errs.label Generation Errors")
print("generation_errs.warning " + os.getenv('generation_errs_warning',
default=str(warning)))
if os.getenv('generation_errs_critical', default=critical):
print("generation_errs.critical " +
os.getenv('generation_errs_critical',
default=str(critical)))
print("generation_errs.warning " + this_gen_errs_warn)
if this_gen_errs_crit:
print("generation_errs.critical " + this_gen_errs_crit)
# Labels and warning/critical values for Read Errors
this_read_errs_warn = os.getenv('read_errs_warning', default=warning)
this_read_errs_warn = os.getenv('read_' + fsid + "_" +
str(this_device.devid) + '_warning',
default=this_read_errs_warn)
this_read_errs_crit = os.getenv('read_errs_critical', default=critical)
this_read_errs_crit = os.getenv('read_errs_' + fsid + "_" +
str(this_device.devid) + '_critical',
default=this_read_errs_crit)
print("read_errs.label Read Errors")
print("read_errs.warning " + os.getenv('read_errs_warning',
default=str(warning)))
if os.getenv('read_errs_critical', default=critical):
print("read_errs.critical " + os.getenv('read_errs_critical',
default=str(critical)))
print("read_errs.warning " + this_read_errs_warn)
if this_read_errs_crit:
print("read_errs.critical " + this_read_errs_crit)
# Labels and warning/critical values for Write Errors
this_write_errs_warn = os.getenv('write_errs_warning', default=warning)
this_write_errs_warn = os.getenv('write_errs_' + fsid + "_" +
str(this_device.devid) + '_warning',
default=this_write_errs_warn)
this_write_errs_crit = os.getenv('write_errs_critical',
default=critical)
this_write_errs_crit = os.getenv('write_errs_' + fsid + "_" +
str(this_device.devid) + '_critical',
default=this_write_errs_crit)
print("write_errs.label Write Errors")
print("write_errs.warning " + os.getenv('write_errs_warning',
default=str(warning)))
if os.getenv('write_errs_critical', default=critical):
print("write_errs.critical " + os.getenv('write_errs_critical',
default=str(critical)))
print("write_errs.warning " + this_write_errs_warn)
if this_write_errs_crit:
print("write_errs.critical " + this_write_errs_crit)
print("nr_items.label Nr. of Items")
# Labels and warning/critical values for Flags
this_flags_warn = os.getenv('flags_warning', default=warning)
this_flags_warn = os.getenv('flags_' + fsid + "_" +
str(this_device.devid) + '_warning',
default=this_flags_warn)
this_flags_crit = os.getenv('flags_critical', default=critical)
this_flags_crit = os.getenv('flags_' + fsid + "_" +
str(this_device.devid) + '_critical',
default=this_flags_crit)
print("flags.label Nr. of Flags")
print("flags.warning " + os.getenv('flags_warning',
default=str(warning)))
if os.getenv('flags_critical', default=critical):
print("flags.critical " + os.getenv('flags_critical',
default=str(critical)))
print("flags.warning " + this_flags_warn)
if this_flags_crit:
print("flags.critical " + this_flags_crit)
print("")