process_group: monitor process group age, cpu, memory and count

Generic plugin to monitor configured processes. Multiple process
groups can be configured with regexps.
This commit is contained in:
Kim B. Heino 2023-12-11 11:50:40 +02:00
parent 48c4275851
commit e43e882527
1 changed files with 205 additions and 0 deletions

205
plugins/cpu/process_group Executable file
View File

@ -0,0 +1,205 @@
#!/usr/bin/env python3
"""Munin plugin to monitor process group process count, memory, cpu and age.
=head1 NAME
process_group - monitor process group process count, memory, cpu and age
=head1 APPLICABLE SYSTEMS
Linux systems.
=head1 CONFIGURATION
List of monitored process groups must be configured as regexps. Example:
[process_group]
env.group1 php-fpm: pool
env.group2 nginx
This will monitor two process groups: "php-fpm: pool" PHP application server
running any pool and "nginx" web server. Regexps must match "ps -eo command"
output.
=head1 AUTHOR
Kim B. Heino <b@bbbs.net>
=head1 LICENSE
GPLv2
=head1 MAGIC MARKERS
#%# family=auto
#%# capabilities=autoconf
=cut
"""
import os
import re
import subprocess
import sys
import unicodedata
def safename(name):
"""Return safe variable name."""
# Convert ä->a as isalpha('ä') is true
value = unicodedata.normalize('NFKD', name)
value = value.encode('ASCII', 'ignore').decode('utf-8')
# Remove non-alphanumeric chars
return ''.join(char.lower() if char.isalnum() else '_' for char in value)
def run_binary(arg):
"""Run binary and return output."""
try:
return subprocess.run(arg, stdout=subprocess.PIPE, check=False,
encoding='utf-8', errors='ignore').stdout
except FileNotFoundError:
return ''
def parse_config():
"""Get groups from environment variables / munin plugin config."""
groups = []
counter = 1
while True:
group = os.getenv(f'group{counter}')
if not group:
break
groups.append(group)
counter += 1
return groups
def parse_elapsed(text):
"""Parse ps's elapsed field to seconds.
8-23:05:27 day, hour, min, sec
21:16:53 hour, min, sec
04:29 min, sec
"""
days = hours = minutes = seconds = '0'
if '-' in text:
days, text = text.split('-', 1)
if text.count(':') == 2:
hours, text = text.split(':', 1)
minutes, seconds = text.split(':')
return (int(days) * 86400 +
int(hours) * 3600 +
int(minutes) * 60 +
int(seconds))
def collect_data():
"""Run ps and parse its output."""
groups = parse_config()
values = {}
for group in groups:
values[group] = {
'count': 0,
'cpu': 0,
'elapsed': 0,
'rss': 0,
}
lines = run_binary(['ps', '-eo', '%cpu,etime,rss,command'])
for line in lines.splitlines():
cpu, elapsed, rss, command = line.split(None, 3)
for group in groups:
if re.search(group, command):
values[group]['count'] += 1
values[group]['cpu'] += float(cpu)
values[group]['elapsed'] += parse_elapsed(elapsed)
values[group]['rss'] += int(rss) * 1024
return values
def config():
"""Print plugin config."""
groups = parse_config()
if not groups:
return
print('multigraph process_group_age')
print('graph_title Process group average age')
print('graph_category processes')
print('graph_vlabel seconds')
print('graph_args --base 1000')
print('graph_scale no')
for group in groups:
print(f'{safename(group)}.label {group}')
print('multigraph process_group_memory')
print('graph_title Process group average memory')
print('graph_category processes')
print('graph_vlabel bytes')
print('graph_args --base 1024')
for group in groups:
print(f'{safename(group)}.label {group}')
print('multigraph process_group_cpu')
print('graph_title Process group CPU usage')
print('graph_category processes')
print('graph_vlabel %')
print('graph_args --base 1000')
print('graph_scale no')
for group in groups:
print(f'{safename(group)}.label {group}')
print('multigraph process_group_count')
print('graph_title Process group process count')
print('graph_category processes')
print('graph_vlabel processes')
print('graph_args --base 1000')
print('graph_scale no')
for group in groups:
print(f'{safename(group)}.label {group}')
if os.environ.get('MUNIN_CAP_DIRTYCONFIG') == '1':
fetch()
def fetch():
"""Print values."""
data = collect_data()
if not data:
return
print('multigraph process_group_age')
for group, values in data.items():
if not values['count']:
value = 0
else:
value = values['elapsed'] / values['count']
print(f'{safename(group)}.value {value}')
print('multigraph process_group_memory')
for group, values in data.items():
if not values['count']:
value = 0
else:
value = values['rss'] / values['count']
print(f'{safename(group)}.value {value}')
print('multigraph process_group_cpu')
for group, values in data.items():
print(f'{safename(group)}.value {values["cpu"]}')
print('multigraph process_group_count')
for group, values in data.items():
print(f'{safename(group)}.value {values["count"]}')
if __name__ == '__main__':
if len(sys.argv) > 1 and sys.argv[1] == 'autoconf':
print('yes' if parse_config() else 'no (no groups configured)')
elif len(sys.argv) > 1 and sys.argv[1] == 'config':
config()
else:
fetch()