From 0d39b909581ae474b7941e2cc2f7c3946a748b3d Mon Sep 17 00:00:00 2001 From: Michael Grote Date: Mon, 13 Sep 2021 14:54:11 +0200 Subject: [PATCH] =?UTF-8?q?docker=5F=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker_ | 552 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 552 insertions(+) create mode 100644 docker_ diff --git a/docker_ b/docker_ new file mode 100644 index 0000000..03b5522 --- /dev/null +++ b/docker_ @@ -0,0 +1,552 @@ +#!/usr/bin/env python3 +""" +=head1 NAME + +docker_ - Docker wildcard-plugin to monitor a L host. + +This wildcard plugin provides series C, C, C, +C, C, C and C as separate graphs. It also +supports a C suffix that provides all of those as a multigraph. + +=head1 INSTALLATION + +- Copy this plugin in your munin plugins directory +- Install Python3 "docker" package + +=over 2 + +If you want all the graphs as a multigraph, create a single multi symlink. + + ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_multi + +Or choose a subset of those you want. + + ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_containers + ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_cpu + ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_images + ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_memory + ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_network + ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_status + ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_volumes + +=back + +After the installation you need to restart your munin-node: + +=over 2 + + systemctl restart munin-node + +=back + +=head1 CONFIGURATION + +This plugin need to run as root, you need to create a file named docker placed in the +directory /etc/munin/plugin-conf.d/ with the following config (you can also use +Docker environment variables here as described in +https://docs.docker.com/compose/reference/envvars/): + +You can use the EXCLUDE_CONTAINER_NAME environment variable to specify a regular expression +which if matched will exclude the matching containers from the memory and cpu graphs. + +For example + + env.EXCLUDE_CONTAINER_NAME runner + +Would exclude all containers with the word "runner" in the name. + + +=over 2 + + [docker_*] + group docker + env.DOCKER_HOST unix://run/docker.sock + env.EXCLUDE_CONTAINER_NAME regexp + +=back + +You may need to pick a different group depending on the name schema of your +distribution. Or maybe use "user root", if nothing else works. + +=head1 AUTHORS + +This section has been reverse-engineered from git logs + +Codimp : original rewrite + +Rowan Wookey : performance improvement + +Olivier Mehani : Network support, ClientWrapper, general cleanup, multigraph + +=head1 MAGIC MARKERS + + #%# family=auto + #%# capabilities=autoconf suggest multigraph + +=cut +""" + +import os +import sys +import re +try: + from functools import cached_property +except ImportError: + # If cached_property is not available, + # just use the property decorator, without caching + # This is for backward compatibility with Python<3.8 + cached_property = property +from multiprocessing import Process, Queue + + +def sorted_by_creation_date(func): + def sorted_func(*args, **kwargs): + return sorted( + func(*args, **kwargs), + key=( + lambda x: x.attrs['CreatedAt'] + if 'CreatedAt' in x.attrs + else x.attrs['Created'] + ) + ) + return sorted_func + + +def clean_fieldname(text): + if text == "root": + # "root" is a magic (forbidden) word + return "_root" + else: + return re.sub(r"(^[^A-Za-z_]|[^A-Za-z0-9_])", "_", text) + + +class ClientWrapper: + """ + A small wrapper for the docker client, to centralise some parsing logic, + and support caching. + + In addition, when the exclude_re parameter is not None, + any container which name is matched by the RE will not be excluded from reports. + """ + client = None + exclude = None + + def __init__(self, client, exclude_re=None): + self.client = client + if exclude_re: + self.exclude = re.compile(exclude_re) + + @property + def api(self): + return self.client.api + + @cached_property + @sorted_by_creation_date + def all_containers(self): + return [ + c for c in self.client.containers.list(all=True) + if (c.status == 'running') and (not self.exclude or not self.exclude.search(c.name)) + ] + + @cached_property + @sorted_by_creation_date + def intermediate_images(self): + return list( + set(self.all_images) + .difference( + set(self.images) + .difference( + set(self.dangling_images) + ) + ) + ) + + @cached_property + @sorted_by_creation_date + def all_images(self): + return self.client.images.list(all=True) + + @cached_property + @sorted_by_creation_date + def images(self): + images = self.client.images.list() + return list( + set(images) + .difference( + set(self.dangling_images)) + ) + + @cached_property + @sorted_by_creation_date + def dangling_images(self): + return self.client.images.list(filters={'dangling': True}) + + @cached_property + @sorted_by_creation_date + def volumes(self): + return self.client.volumes.list() + + +def container_summary(container, *args): + summary = container.name + attributes = container_attributes(container, *args) + if attributes: + summary += f' ({attributes})' + return summary + + +def container_attributes(container, *args): + attributes = container.image.tags + attributes.append(container.attrs['Created']) + return ', '.join(attributes + list(args)) + + +def print_containers_status(client): + running = [] + unhealthy = [] + paused = [] + created = [] + restarting = [] + removing = [] + exited = [] + dead = [] + for container in client.all_containers: + if container.status == 'running': + state = client.api.inspect_container(container.name)['State'] + if state.get('Health', {}).get('Status') == 'unhealthy': + unhealthy.append(container) + else: + running.append(container) + elif container.status == 'paused': + paused.append(container) + elif container.status == 'created': + created.append(container) + elif container.status == 'restarting': + restarting.append(container) + elif container.status == 'removing': + removing.append(container) + elif container.status == 'exited': + exited.append(container) + elif container.status == 'dead': + dead.append(container) + print('running.value', len(running)) + print('running.extinfo', ', '.join(container_summary(c) for c in running)) + print('unhealthy.value', len(unhealthy)) + print('unhealthy.extinfo', ', '.join(container_summary(c) for c in unhealthy)) + print('paused.value', len(paused)) + print('paused.extinfo', ', '.join(container_summary(c) for c in paused)) + print('created.value', len(created)) + print('created.extinfo', ', '.join(container_summary(c) for c in created)) + print('restarting.value', len(restarting)) + print('restarting.extinfo', ', '.join(container_summary(c) for c in restarting)) + print('removing.value', len(removing)) + print('removing.extinfo', ', '.join(container_summary(c) for c in removing)) + print('exited.value', len(exited)) + print('exited.extinfo', ', '.join(container_summary(c) for c in exited)) + print('dead.value', len(dead)) + print('dead.extinfo', ', '.join(container_summary(c) for c in dead)) + + +def image_summary(image): + attributes = image.tags + attributes.append(image.attrs['Created']) + attributes.append(f"{round(image.attrs['Size']/1024**2, 2)} MiB") + return f"{image.short_id} ({', '.join(attributes)})" + + +def print_images_count(client): + images = client.images + intermediate = client.intermediate_images + dangling = client.dangling_images + + print('intermediate_quantity.value', len(intermediate)) + print('intermediate_quantity.extinfo', ', '.join(image_summary(i) for i in intermediate)) + print('images_quantity.value', len(images)) + print('images_quantity.extinfo', ', '.join(image_summary(i) for i in images)) + print('dangling_quantity.value', len(dangling)) + print('dangling_quantity.extinfo', ', '.join(image_summary(i) for i in dangling)) + + +def get_container_stats(container, q): + q.put(container.stats(stream=False)) + + +def parallel_container_stats(client): + proc_list = [] + stats = {} + for container in client.all_containers: + q = Queue() + p = Process(target=get_container_stats, args=(container, q)) + proc_list.append({'proc': p, 'queue': q, 'container': container}) + p.start() + for proc in proc_list: + proc['proc'].join() + stats[proc['container']] = proc['queue'].get() + return stats.items() + + +def print_containers_cpu(client): + for container, stats in parallel_container_stats(client): + cpu_percent = 0.0 + cpu_delta = (float(stats["cpu_stats"]["cpu_usage"]["total_usage"]) + - float(stats["precpu_stats"]["cpu_usage"]["total_usage"])) + system_delta = (float(stats["cpu_stats"]["system_cpu_usage"]) + - float(stats["precpu_stats"]["system_cpu_usage"])) + if system_delta > 0.0: + cpu_percent = cpu_delta / system_delta * 100.0 * os.cpu_count() + clean_container_name = clean_fieldname(container.name) + print(clean_container_name + '.value', cpu_percent) + print(clean_container_name + '.extinfo', container_attributes(container)) + + +def print_containers_memory(client): + for container, stats in parallel_container_stats(client): + if 'total_rss' in stats['memory_stats']['stats']: # cgroupv1 only? + memory_usage = stats['memory_stats']['stats']['total_rss'] + extinfo = 'Resident Set Size' + else: + memory_usage = stats['memory_stats']['usage'] + extinfo = 'Total memory usage' + clean_container_name = clean_fieldname(container.name) + print(clean_container_name + '.value', memory_usage) + print(clean_container_name + '.extinfo', container_attributes(container, extinfo)) + + +def print_containers_network(client): + for container, stats in parallel_container_stats(client): + tx_bytes = 0 + rx_bytes = 0 + if "networks" in stats: + for data in stats['networks'].values(): + tx_bytes += data['tx_bytes'] + rx_bytes += data['rx_bytes'] + clean_container_name = clean_fieldname(container.name) + print(clean_container_name + '_up.value', tx_bytes) + print(clean_container_name + '_down.value', rx_bytes) + print(clean_container_name + '_up.extinfo', container_attributes(container)) + + +def volume_summary(volume): + summary = f"{volume.short_id}" + if volume.attrs['Labels']: + summary += f" ({', '.join(volume.attrs['Labels'])})" + return summary + + +def status(client, mode): + if mode == "config": + print("graph_title Docker status") + print("graph_vlabel containers") + print("graph_category virtualization") + print("graph_total All containers") + print("running.label RUNNING") + print("running.draw AREASTACK") + print("running.info Running containers can be manipulated with " + "`docker container [attach|kill|logs|pause|restart|stop] ` or " + "commands run in them with `docker container exec " + "[--detach|--interactive,--privileged,--tty] `" + ) + print("unhealthy.label UNHEALTHY") + print("unhealthy.draw AREASTACK") + print("unhealthy.warning 1") + print("unhealthy.info Unhealthy containers can be restarted with " + "`docker container restart `") + print("paused.label PAUSED") + print("paused.draw AREASTACK") + print("paused.info Paused containers can be resumed with " + "`docker container unpause `") + print("created.label CREATED") + print("created.draw AREASTACK") + print("created.info New containers can be created with " + "`docker container create --name ` or " + "`docker container run --name `") + print("restarting.label RESTARTING") + print("restarting.draw AREASTACK") + print("restarting.info Containers can be restarted with " + "`docker container restart `") + print("removing.label REMOVING") + print("removing.draw AREASTACK") + print("removing.info Containers can be removed with " + "`docker container rm `") + print("exited.label EXITED") + print("exited.draw AREASTACK") + print("exited.info Exited containers can be started with " + "`docker container start [--attach] `") + print("dead.label DEAD") + print("dead.draw AREASTACK") + print("dead.warning 1") + print("dead.info Dead containers can be started with " + "`docker container start `") + else: + print_containers_status(client) + + +def containers(client, mode): + if mode == "config": + print("graph_title Docker containers") + print("graph_vlabel containers") + print("graph_category virtualization") + print("containers_quantity.label Containers") + else: + print('containers_quantity.value', len(client.all_containers)) + + +def images(client, mode): + if mode == "config": + print("graph_title Docker images") + print("graph_vlabel images") + print("graph_category virtualization") + print("graph_total All images") + print("intermediate_quantity.label Intermediate images") + print("intermediate_quantity.draw AREASTACK") + print("intermediate_quantity.info All unused images can be deleted with " + "`docker image prune --all`") + print("images_quantity.label Images") + print("images_quantity.draw AREASTACK") + print("images_quantity.info Images can be used in containers with " + "`docker container create --name ` or " + "`docker container run --name `") + print("dangling_quantity.label Dangling images") + print("dangling_quantity.draw AREASTACK") + print("dangling_quantity.info Dangling images can be deleted with " + "`docker image prune`" + "or tagged with `docker image tag `") + print("dangling_quantity.warning 10") + else: + print_images_count(client) + + +def volumes(client, mode): + if mode == "config": + print("graph_title Docker volumes") + print("graph_vlabel volumes") + print("graph_category virtualization") + print("volumes_quantity.label Volumes") + print("volumes_quantity.draw AREASTACK") + print("volumes_quantity.info Unused volumes can be deleted with " + "`docker volume prune`") + else: + print('volumes_quantity.value', len(client.volumes)) + print('volumes_quantity.extinfo', ', '.join(volume_summary(v) for v in client.volumes)) + + +def cpu(client, mode): + if mode == "config": + graphlimit = str(os.cpu_count() * 100) + print("graph_title Docker containers CPU usage") + print("graph_args --base 1000 -r --lower-limit 0 --upper-limit " + graphlimit) + print("graph_scale no") + print("graph_period second") + print("graph_vlabel CPU usage (%)") + print("graph_category virtualization") + print("graph_info This graph shows docker container CPU usage.") + print("graph_total Total CPU usage") + for container in client.all_containers: + fieldname = clean_fieldname(container.name) + print("{}.label {}".format(fieldname, container.name)) + print("{}.draw AREASTACK".format(fieldname)) + print("{}.info {}".format(fieldname, container_attributes(container))) + else: + print_containers_cpu(client) + + +def network(client, mode): + if mode == "config": + print("graph_title Docker containers network usage") + print("graph_args --base 1024 -l 0") + print("graph_vlabel bits in (-) / out (+) per ${graph_period}") + print("graph_category virtualization") + print("graph_info This graph shows docker container network usage.") + print("graph_total Total network usage") + for container in client.all_containers: + fieldname = clean_fieldname(container.name) + print("{}_down.label {}_received".format(fieldname, container.name)) + print("{}_down.type DERIVE".format(fieldname)) + print("{}_down.min 0".format(fieldname)) + print("{}_down.graph no".format(fieldname)) + print("{}_down.cdef {}_down,8,*".format(fieldname, fieldname)) + print("{}_up.label {}".format(fieldname, container.name)) + print("{}_up.draw LINESTACK1".format(fieldname)) + print("{}_up.type DERIVE".format(fieldname)) + print("{}_up.min 0".format(fieldname)) + print("{}_up.negative {}_down".format(fieldname, fieldname)) + print("{}_up.cdef {}_up,8,*".format(fieldname, fieldname)) + print("{}_up.info {}".format(fieldname, container_attributes(container))) + else: + print_containers_network(client) + + +def memory(client, mode): + if mode == "config": + print("graph_title Docker containers memory usage") + print("graph_args --base 1024 -l 0") + print("graph_vlabel Bytes") + print("graph_category virtualization") + print("graph_info This graph shows docker container memory usage.") + print("graph_total Total memory usage") + for container in client.all_containers: + fieldname = clean_fieldname(container.name) + print("{}.label {}".format(fieldname, container.name)) + print("{}.draw AREASTACK".format(fieldname)) + print("{}.info {}".format(fieldname, container_attributes(container))) + else: + print_containers_memory(client) + + +def main(): + series = [ + 'containers', + 'cpu', + 'images', + 'memory', + 'network', + 'status', + 'volumes', + ] + + try: + mode = sys.argv[1] + except IndexError: + mode = "" + wildcard = sys.argv[0].split("docker_")[1].split("_")[0] + + try: + import docker + client = docker.from_env() + if mode == "autoconf": + client.ping() + print('yes') + sys.exit(0) + except Exception as e: + print(f'no ({e})') + if mode == "autoconf": + sys.exit(0) + sys.exit(1) + + if mode == "suggest": + # The multigraph covers all other graphs, + # so we only need to suggest one + print("multi") + sys.exit(0) + + client = ClientWrapper(client, + exclude_re=os.getenv('EXCLUDE_CONTAINER_NAME')) + + if wildcard in series: + # dereference the function name by looking in the globals() + # this assumes that the function name matches the series name exactly + # if this were to change, a different approach would be needed, + # most likely using a Dict of series name string to callable + globals()[wildcard](client, mode) + elif wildcard == 'multi': + for s in series: + print(f'multigraph docker_{s}') + # ditto + globals()[s](client, mode) + else: + print(f'unknown series ({wildcard})', file=sys.stderr) + sys.exit(1) + + +if __name__ == '__main__': + main()