[plugin/docker_] Improvements to docker_ plugin (#1094)

* Add support for docker_network
* Actually show non running containers counts
* Show intermediate and dangling images counts
* Add warnings on dangling images and dead containers
* Fully support autoconf and suggest
* Add graph_total to the status (thereby making the containers graph
  redundant), CPU and Memory graphs
* Show most graphs as AREASTACK, this allows to get a clearer view of
  both individual and total use
* Use LINESTACK1 to work around munin-contrib/munin#1343
* Include all containers in CPU/Memory/Network config, so data from
  non-running containers is still displayed
* Sprinkle some info and extinfo
* Add ClientWrapper around docker module, providing caching and sorting
* Reverse-engineer author list from git log
This commit is contained in:
Olivier Mehani 2020-08-26 00:17:18 +10:00 committed by GitHub
parent 6052c56d94
commit 13d5b23442
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 284 additions and 39 deletions

View File

@ -5,7 +5,7 @@
docker_ - Docker wildcard-plugin to monitor a L<Docker|https://www.docker.com> host.
This wildcard plugin provides at the moment only the suffixes C<containers>, C<images>, C<status>,
C<volumes>, C<cpu> and C<memory>.
C<volumes>, C<cpu>, C<memory> and C<network>.
=head1 INSTALLATION
@ -18,6 +18,7 @@ C<volumes>, C<cpu> and C<memory>.
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_cpu
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_images
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_memory
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_network
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_status
ln -s /usr/share/munin/plugins/docker_ /etc/munin/plugins/docker_volumes
@ -56,45 +57,181 @@ Would exclude all containers with the word "runner" in the name.
env.EXCLUDE_CONTAINER_NAME regexp
=back
=head1 AUTHORS
This section has been reverse-engineered from git logs
* Codimp <contact@lithio.fr>: original rewrite
* Rowan Wookey <admin@rwky.net>: performance improvement
* Olivier Mehani <shtrom@ssji.net>: Network support, ClientWrapper, gerenal
cleanup
=head1 MAGIC MARKERS
#%# family=auto
#%# capabilities=autoconf suggest
"""
import os
import sys
import docker
import re
from functools import cached_property
from multiprocessing import Process, Queue
def sorted_by_creation_date(func):
def sorted_func(*args, **kwargs):
return sorted(
func(*args, **kwargs),
key=(
lambda x: x.attrs['CreatedAt']
if 'CreatedAt' in x.attrs
else x.attrs['Created']
)
)
return sorted_func
class ClientWrapper:
"""
A small wrapper for the docker client, to centralise some parsing logic,
and support caching.
In addition, when the exclude_re parameter is not None,
any container which name is matched by the RE will not be excluded from reports.
"""
client = None
exclude = None
def __init__(self, client, exclude_re=None):
self.client = client
if exclude_re:
self.exclude = re.compile(exclude_re)
@cached_property
@sorted_by_creation_date
def containers(self):
return self.client.containers.list()
@cached_property
@sorted_by_creation_date
def all_containers(self):
return [c for c in self.client.containers.list(all=True)
if not self.exclude
or not self.exclude.search(c.name)]
@cached_property
@sorted_by_creation_date
def intermediate_images(self):
return list(
set(self.all_images)
.difference(
set(self.images)
.difference(
set(self.dangling_images)
)
)
)
@cached_property
@sorted_by_creation_date
def all_images(self):
return self.client.images.list(all=True)
@cached_property
@sorted_by_creation_date
def images(self):
images = self.client.images.list()
return list(
set(images)
.difference(
set(self.dangling_images))
)
@cached_property
@sorted_by_creation_date
def dangling_images(self):
return self.client.images.list(filters={'dangling': True})
@cached_property
@sorted_by_creation_date
def volumes(self):
return self.client.volumes.list()
def container_summary(container):
summary = container.name
attributes = container_attributes(container)
if attributes:
summary += f' ({attributes})'
return summary
def container_attributes(container):
attributes = container.image.tags
attributes.append(container.attrs['Created'])
return ', '.join(attributes)
def print_containers_status(client):
running = 0
paused = 0
created = 0
restarting = 0
removing = 0
exited = 0
dead = 0
for container in client.containers.list():
running = []
paused = []
created = []
restarting = []
removing = []
exited = []
dead = []
for container in client.all_containers:
if container.status == 'running':
running += 1
running.append(container)
elif container.status == 'paused':
paused += 1
paused.append(container)
elif container.status == 'created':
created += 1
created.append(container)
elif container.status == 'restarting':
restarting += 1
restarting.append(container)
elif container.status == 'removing':
removing += 1
removing.append(container)
elif container.status == 'exited':
exited += 1
exited.append(container)
elif container.status == 'dead':
dead += 1
print('running.value', running)
print('paused.value', paused)
print('created.value', created)
print('restarting.value', restarting)
print('removing.value', removing)
print('exited.value', exited)
print('dead.value', dead)
dead.append(container)
print('running.value', len(running))
print('running.extinfo', ', '.join(container_summary(c) for c in running))
print('paused.value', len(paused))
print('paused.extinfo', ', '.join(container_summary(c) for c in paused))
print('created.value', len(created))
print('created.extinfo', ', '.join(container_summary(c) for c in created))
print('restarting.value', len(restarting))
print('restarting.extinfo', ', '.join(container_summary(c) for c in restarting))
print('removing.value', len(removing))
print('removing.extinfo', ', '.join(container_summary(c) for c in removing))
print('exited.value', len(exited))
print('exited.extinfo', ', '.join(container_summary(c) for c in exited))
print('dead.value', len(dead))
print('dead.extinfo', ', '.join(container_summary(c) for c in dead))
def image_summary(image):
attributes = image.tags
attributes.append(image.attrs['Created'])
attributes.append(f"{round(image.attrs['Size']/1024**2, 2)} MiB")
return f"{image.short_id} ({', '.join(attributes)})"
def print_images_count(client):
images = client.images
intermediate = client.intermediate_images
dangling = client.dangling_images
print('intermediate_quantity.value', len(intermediate))
print('intermediate_quantity.extinfo', ', '.join(image_summary(i) for i in intermediate))
print('images_quantity.value', len(images))
print('images_quantity.extinfo', ', '.join(image_summary(i) for i in images))
print('dangling_quantity.value', len(dangling))
print('dangling_quantity.extinfo', ', '.join(image_summary(i) for i in dangling))
def get_container_stats(container, q):
@ -104,10 +241,7 @@ def get_container_stats(container, q):
def parallel_container_stats(client):
proc_list = []
stats = {}
exclude = os.getenv('EXCLUDE_CONTAINER_NAME')
for container in client.containers.list():
if exclude and re.search(exclude, container.name):
break
for container in client.containers:
q = Queue()
p = Process(target=get_container_stats, args=(container, q))
proc_list.append({'proc': p, 'queue': q, 'container': container})
@ -122,18 +256,39 @@ def print_containers_cpu(client):
for container, stats in parallel_container_stats(client):
cpu_count = len(stats["cpu_stats"]["cpu_usage"]["percpu_usage"])
cpu_percent = 0.0
cpu_delta = float(stats["cpu_stats"]["cpu_usage"]["total_usage"]) \
- float(stats["precpu_stats"]["cpu_usage"]["total_usage"])
system_delta = float(stats["cpu_stats"]["system_cpu_usage"]) \
- float(stats["precpu_stats"]["system_cpu_usage"])
cpu_delta = (float(stats["cpu_stats"]["cpu_usage"]["total_usage"])
- float(stats["precpu_stats"]["cpu_usage"]["total_usage"]))
system_delta = (float(stats["cpu_stats"]["system_cpu_usage"])
- float(stats["precpu_stats"]["system_cpu_usage"]))
if system_delta > 0.0:
cpu_percent = cpu_delta / system_delta * 100.0 * cpu_count
print(container.name + '.value', cpu_percent)
print(container.name + '.extinfo', container_attributes(container))
def print_containers_memory(client):
for container, stats in parallel_container_stats(client):
print(container.name + '.value', stats['memory_stats']['stats']['total_rss'])
print(container.name + '.extinfo', container_attributes(container))
def print_containers_network(client):
for container, stats in parallel_container_stats(client):
tx_bytes = 0
rx_bytes = 0
for data in stats['networks'].values():
tx_bytes += data['tx_bytes']
rx_bytes += data['rx_bytes']
print(container.name + '_up.value', tx_bytes)
print(container.name + '_down.value', rx_bytes)
print(container.name + '.extinfo', container_attributes(container))
def volume_summary(volume):
summary = f"{volume.short_id}"
if volume.attrs['Labels']:
summary += " ({', '.join(volume.attrs['Labels'])})"
return summary
def main():
@ -143,28 +298,70 @@ def main():
mode = ""
wildcard = sys.argv[0].split("docker_")[1].split("_")[0]
try:
import docker
client = docker.from_env()
if mode == "autoconf":
client.ping()
print('yes')
sys.exit(0)
except Exception as e:
print(f'no ({e})')
if mode == "autoconf":
sys.exit(0)
sys.exit(1)
if mode == "suggest":
print("containers")
print("cpu")
print("images")
print("memory")
print("network")
print("status")
print("volumes")
sys.exit(0)
client = docker.from_env()
client = ClientWrapper(client,
exclude_re=os.getenv('EXCLUDE_CONTAINER_NAME'))
if wildcard == "status":
if mode == "config":
print("graph_title Docker status")
print("graph_vlabel containers")
print("graph_category virtualization")
print("graph_total All containers")
print("running.label RUNNING")
print("running.draw AREASTACK")
print("running.info Running containers can be manipulated with "
"`docker container [attach|kill|logs|pause|restart|stop] <NAME>` or "
"commands run in them with `docker container exec "
"[--detach|--interactive,--privileged,--tty] <NAME> <COMMAND>`"
)
print("paused.label PAUSED")
print("paused.draw AREASTACK")
print("paused.info Paused containers can be resumed with "
"`docker container unpause <NAME>`")
print("created.label CREATED")
print("created.draw AREASTACK")
print("created.info New containers can be created with "
"`docker container create --name <NAME> <IMAGE_ID >` or "
"`docker container run --name <NAME> <IMAGE_ID> <COMMAND>`")
print("restarting.label RESTARTING")
print("restarting.draw AREASTACK")
print("restarting.info Containers can be restarted with "
"`docker container restart <NAME>`")
print("removing.label REMOVING")
print("removing.draw AREASTACK")
print("removing.info Containers can be removed with "
"`docker container rm <NAME>`")
print("exited.label EXITED")
print("exited.draw AREASTACK")
print("exited.info Exited containers can be started with "
"`docker container start [--attach] <NAME>`")
print("dead.label DEAD")
print("dead.draw AREASTACK")
print("dead.warning 1")
print("dead.info Dead containers can be started with "
"`docker container start <NAME>`")
else:
print_containers_status(client)
elif wildcard == "containers":
@ -174,23 +371,42 @@ def main():
print("graph_category virtualization")
print("containers_quantity.label Containers")
else:
print('containers_quantity.value', len(client.containers.list()))
print('containers_quantity.value', len(client.containers))
elif wildcard == "images":
if mode == "config":
print("graph_title Docker images")
print("graph_vlabel images")
print("graph_category virtualization")
print("graph_total All images")
print("intermediate_quantity.label Intermediate images")
print("intermediate_quantity.draw AREASTACK")
print("intermediate_quantity.info All unused images can be deleted with "
"`docker image prune --all`")
print("images_quantity.label Images")
print("images_quantity.draw AREASTACK")
print("images_quantity.info Images can be used in containers with "
"`docker container create --name <NAME> <IMAGE_ID >` or "
"`docker container run --name <NAME> <IMAGE_ID> <COMMAND>`")
print("dangling_quantity.label Dangling images")
print("dangling_quantity.draw AREASTACK")
print("dangling_quantity.info Dangling images can be deleted with "
"`docker image prune`"
"or tagged with `docker image tag <IMAGE_ID> <NAME>`")
print("dangling_quantity.warning 10")
else:
print('images_quantity.value', len(client.images.list()))
print_images_count(client)
elif wildcard == "volumes":
if mode == "config":
print("graph_title Docker volumes")
print("graph_vlabel volumes")
print("graph_category virtualization")
print("volumes_quantity.label Volumes")
print("volumes_quantity.draw AREASTACK")
print("volumes_quantity.info Unused volumes can be deleted with "
"`docker volume prune`")
else:
print('volumes_quantity.value', len(client.volumes.list()))
print('volumes_quantity.value', len(client.volumes))
print('volumes_quantity.extinfo', ', '.join(volume_summary(v) for v in client.volumes))
elif wildcard == "cpu":
if mode == "config":
graphlimit = str(os.cpu_count() * 100)
@ -201,8 +417,11 @@ def main():
print("graph_vlabel CPU usage (%)")
print("graph_category virtualization")
print("graph_info This graph shows docker container CPU usage.")
for container in client.containers.list():
print("graph_total Total CPU usage")
for container in client.all_containers:
print("{}.label {}".format(container.name, container.name))
print("{}.draw AREASTACK".format(container.name))
print("{}.info {}".format(container.name, container_attributes(container)))
else:
print_containers_cpu(client)
elif wildcard == "memory":
@ -212,10 +431,36 @@ def main():
print("graph_vlabel Bytes")
print("graph_category virtualization")
print("graph_info This graph shows docker container memory usage.")
for container in client.containers.list():
print("graph_total Total memory usage")
for container in client.all_containers:
print("{}.label {}".format(container.name, container.name))
print("{}.draw AREASTACK".format(container.name))
print("{}.info {}".format(container.name, container_attributes(container)))
else:
print_containers_memory(client)
elif wildcard == "network":
if mode == "config":
print("graph_title Docker containers network usage")
print("graph_args --base 1024 -l 0")
print("graph_vlabel bits in (-) / out (+) per ${graph_period}")
print("graph_category virtualization")
print("graph_info This graph shows docker container network usage.")
print("graph_total Total network usage")
for container in client.all_containers:
print("{}_down.label {}_received".format(container.name, container.name))
print("{}_down.type DERIVE".format(container.name))
print("{}_down.min 0".format(container.name))
print("{}_down.graph no".format(container.name))
print("{}_down.cdef {}_down,8,*".format(container.name, container.name))
print("{}_up.label {}".format(container.name, container.name))
print("{}_up.draw LINESTACK1".format(container.name))
print("{}_up.type DERIVE".format(container.name))
print("{}_up.min 0".format(container.name))
print("{}_up.negative {}_down".format(container.name, container.name))
print("{}_up.cdef {}_up,8,*".format(container.name, container.name))
print("{}_up.info {}".format(container.name, container_attributes(container)))
else:
print_containers_network(client)
if __name__ == '__main__':