lxc_guests: Enhanced and fixed lxc_guests plugin (#1371)

* lxc_guests2: Enhanced and fixed lxc_guests plugin

- Working with systemd and cgroup version 2 (tested on debian bullseye and debian booksworm)
- Fixed processes (with cgroup version 2)
- Simplified labels
- Simplified memory usage graph
- Added CPU usage in percent (using systemd-cgtop)
- Added Tasks
- No cgrouppath guessing

* Rename lxc_guests2 to lxc_guests to replace old plugin

---------

Co-authored-by: Sebastian L <sl@momou.ch>
This commit is contained in:
brknkfr 2023-04-29 17:38:44 +00:00 committed by GitHub
parent 181e964c49
commit 46a483ed36
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 102 additions and 79 deletions

View File

@ -11,11 +11,18 @@ lxc_guests - collect statistics about containers virtualized via LXC
[lxc_guests] [lxc_guests]
user root user root
group root
# The memory usage of containers are by default drawn as stacked area # The memory usage of containers are by default drawn as stacked area
# charts. Alternatively a non-stacked graph with lines can be configured. # charts. Alternatively a non-stacked graph with lines can be configured.
# Default: true # Default: true
#env.ram_display_stacked true #env.ram_display_stacked true
# The cpu usage in percent of containers are by default drawn as stacked
# area charts. Alternatively a non-stacked graph with lines can be
# configured.
# Default: true
#env.cpu_usage_stacked true
# lxc container path, default below # lxc container path, default below
#env.lxcpath /var/lib/lxc #env.lxcpath /var/lib/lxc
@ -24,32 +31,20 @@ lxc_guests - collect statistics about containers virtualized via LXC
# (default none excluded) # (default none excluded)
#env.exclude container1 container2 #env.exclude container1 container2
# path where tasks sysfs files are stored,
# set this if the various attempts in the
# code don't work
# (default none)
#env.cgrouppath /sys/fs/cgroup/cpuacct/lxc/
=head1 INTERPRETATION =head1 INTERPRETATION
This plugin needs root privilege. This version of the plugin replaces the old lxc_guests plugin and works
with newer lxc versions, with cgroup version 2 and systemd (systemd-cgtop is
used to get cpu usage). Use an older revision of this plugin for systems with
cgroup version 1.
This plugin has been tested with lxc 3 and This plugin needs root (user and group) privilege.
lx2 (on Debian buster and Debian jessie,
respectively).
If using lxc 2, make sure you do not have cruft This plugin has been tested with lxc 4 and lxc 5 (on Debian bullseye and
in your container config files, you can test Debian booksworm, respectively).
it with:
lxc-cgroup -o /dev/stdout -l INFO -n 104 cpuacct.usage
-- with 104 a valid lxc instance), if you
get a warning, fix the config file.
For the logins graph, the "users" command is required in each
container.
Tested on Debian buster and Debian jessie.
For the logins graph, the "users" command is required in each container and
user/group has to be set to root for lxc-attach.
=head1 AUTHOR =head1 AUTHOR
@ -57,6 +52,7 @@ vajtsz vajtsz@gmail.com
mitty mitty@mitty.jp mitty mitty@mitty.jp
alphanet schaefer@alphanet.ch (many changes and multigraph) alphanet schaefer@alphanet.ch (many changes and multigraph)
Lars Kruse <devel@sumpfralle.de> Lars Kruse <devel@sumpfralle.de>
Sebastian L. <https://momou.ch>
=head1 LICENSE =head1 LICENSE
@ -80,9 +76,7 @@ lxcpath=${lxcpath:-/var/lib/lxc}
# containers to be ignored # containers to be ignored
exclude=${exclude:-} exclude=${exclude:-}
ram_display_stacked=${ram_display_stacked:-true} ram_display_stacked=${ram_display_stacked:-true}
# try to guess the location, if empty cpu_usage_display_stacked=${cpu_usage_display_stacked:-true}
cgrouppath=${cgrouppath:-}
# --- FUNCTIONS # --- FUNCTIONS
@ -104,51 +98,21 @@ get_active_guests() {
get_lxc_cgroup_info() { get_lxc_cgroup_info() {
local guest_name="$1" local guest_name="$1"
local field="$2" local field="$2"
# lxc3 (lxc < 3: may output some warnings if there is cruft in your config dir) lxc-cgroup -o /dev/stdout -l INFO -n "$guest_name" "$field" | grep -v set_config_idmaps
lxc-cgroup -o /dev/stdout -l INFO -n "$guest_name" "$field" | sed 's/^.*lxc_cgroup.c:main:[0-9][0-9]* - //' | grep -v set_config_idmaps
} }
# find proper sysfs and count it
# Debian 6.0: /sys/fs/cgroup/<container>/tasks
# Ubuntu 12.04 with fstab: /sys/fs/cgroup/lxc/<container>/tasks
# Ubuntu 12.04 with cgroup-lite: /sys/fs/cgroup/cpuacct/lxc/<container>/tasks
# Ubuntu 12.04 with cgroup-bin: /sys/fs/cgroup/cpuacct/sysdefault/lxc/<container>/tasks
# Ubuntu 14.04 /sys/fs/cgroup/systemd/lxc/<container>/tasks
# and with cgmanager on jessie
lxc_count_processes () { lxc_count_processes () {
local guest_name="$1" local guest_name="$1"
local SYSFS local processes
[ -z "$guest_name" ] && return 0 [ -z "$guest_name" ] && return 0
if [ -n "$cgrouppath" ]; then processes=$(find /sys/fs/cgroup/lxc.payload."$guest_name"/ -name cgroup.procs -exec cat {} \; | wc -l)
SYSFS="$cgrouppath/$guest_name/tasks" if [ -n "$processes" ]; then
if [ -e "$SYSFS" ]; then echo "$processes"
wc -l <"$SYSFS"
return
fi
fi fi
for SYSFS in \
"/sys/fs/cgroup/$guest_name/tasks" \
"/sys/fs/cgroup/lxc/$guest_name/tasks" \
"/sys/fs/cgroup/cpuacct/lxc/$guest_name/tasks" \
"/sys/fs/cgroup/systemd/lxc/$guest_name/tasks" \
"/sys/fs/cgroup/cpuacct/sysdefault/lxc/$guest_name/tasks" \
"/sys/fs/cgroup/cpu/lxc.payload.$guest_name/tasks"
do
if [ -e "$SYSFS" ]; then
wc -l <"$SYSFS"
return
fi
done
if [ -e /usr/bin/cgm ]; then
cgm getvalue cpu "lxc/$guest_name" tasks 2>/dev/null | wc -l
else
get_lxc_cgroup_info "$guest_name" "tasks" | wc -l
fi
} }
@ -173,7 +137,7 @@ do_autoconf() {
do_config() { do_config() {
local active_guests guest_name draw_style local active_guests guest_name draw_style NCPU graphlimit
active_guests=$(get_active_guests "$exclude") active_guests=$(get_active_guests "$exclude")
cat <<EOF cat <<EOF
@ -201,30 +165,64 @@ EOF
multigraph lxc_cpu_time multigraph lxc_cpu_time
graph_title CPU time graph_title CPU time
graph_args -l 0 --base 1000 graph_args -l 0 --base 1000
graph_vlabel nanosec graph_vlabel milisec
graph_category virtualization graph_category virtualization
EOF EOF
for guest_name in $active_guests for guest_name in $active_guests
do do
cat <<EOF cat <<EOF
$(clean_fieldname "cpu_time_${guest_name}").label $guest_name: CPU time $(clean_fieldname "cpu_time_${guest_name}").label $guest_name
$(clean_fieldname "cpu_time_${guest_name}").type DERIVE $(clean_fieldname "cpu_time_${guest_name}").type DERIVE
$(clean_fieldname "cpu_time_${guest_name}").min 0 $(clean_fieldname "cpu_time_${guest_name}").min 0
EOF EOF
done done
NCPU=$(grep -cE '^cpu[0-9]+ ' /proc/stat)
graphlimit=$((NCPU * 100))
cat <<EOF
multigraph lxc_cpu_usage
graph_title CPU Usage (%)
graph_args -l 0 -u $graphlimit -r
graph_vlabel %
graph_scale no
graph_category virtualization
EOF
for guest_name in $active_guests
do
if [ "$cpu_usage_display_stacked" != "true" ]; then
draw_style="LINE1"
else
draw_style="AREASTACK"
fi
cat <<EOF
$(clean_fieldname "cpu_usage_${guest_name}").label $guest_name
$(clean_fieldname "cpu_usage_${guest_name}").min 0
$(clean_fieldname "cpu_usage_${guest_name}").draw $draw_style
EOF
done
cat <<EOF cat <<EOF
multigraph lxc_logins multigraph lxc_logins
graph_title Logins graph_title Logins
graph_args -l 0 --base 1000
graph_vlabel logins
graph_category virtualization graph_category virtualization
graph_info This graph shows currently logged in users.
EOF EOF
for guest_name in $active_guests for guest_name in $active_guests
do do
cat <<EOF cat <<EOF
$(clean_fieldname "logins_${guest_name}").label $guest_name: logins $(clean_fieldname "logins_${guest_name}").label $guest_name
$(clean_fieldname "logins_${guest_name}").min 0
$(clean_fieldname "logins_${guest_name}").type GAUGE $(clean_fieldname "logins_${guest_name}").type GAUGE
EOF EOF
done done
@ -278,9 +276,26 @@ EOF
for guest_name in $active_guests for guest_name in $active_guests
do do
cat <<EOF cat <<EOF
$(clean_fieldname "lxc_proc_${guest_name}").label $guest_name: processes $(clean_fieldname "lxc_proc_${guest_name}").label $guest_name
$(clean_fieldname "lxc_proc_${guest_name}").type GAUGE $(clean_fieldname "lxc_proc_${guest_name}").type GAUGE
$(clean_fieldname "lxc_proc_${guest_name}").min 0 $(clean_fieldname "lxc_proc_${guest_name}").min 0
EOF
done
cat <<EOF
multigraph lxc_task
graph_title Tasks
graph_args -l 0 --base 1000
graph_vlabel Number of tasks
graph_category virtualization
EOF
for guest_name in $active_guests
do
cat <<EOF
$(clean_fieldname "lxc_task_${guest_name}").label $guest_name
$(clean_fieldname "lxc_task_${guest_name}").type GAUGE
$(clean_fieldname "lxc_task_${guest_name}").min 0
EOF EOF
done done
@ -302,37 +317,41 @@ EOF
fi fi
cat <<EOF cat <<EOF
$(clean_fieldname "mem_usage_${guest_name}").label ${guest_name}: Mem usage $(clean_fieldname "mem_usage_${guest_name}").label $guest_name
$(clean_fieldname "mem_usage_${guest_name}").type GAUGE $(clean_fieldname "mem_usage_${guest_name}").type GAUGE
$(clean_fieldname "mem_usage_${guest_name}").draw $draw_style $(clean_fieldname "mem_usage_${guest_name}").draw $draw_style
$(clean_fieldname "mem_cache_${guest_name}").label ${guest_name}: Cache
$(clean_fieldname "mem_cache_${guest_name}").type GAUGE
$(clean_fieldname "mem_active_${guest_name}").label ${guest_name}: Active
$(clean_fieldname "mem_active_${guest_name}").type GAUGE
$(clean_fieldname "mem_inactive_${guest_name}").label ${guest_name}: Inactive
$(clean_fieldname "mem_inactive_${guest_name}").type GAUGE
EOF EOF
done done
} }
do_fetch() { do_fetch() {
local active_guests cpu_usage device value_up value_down local active_guests cpu_usage cpu_usage_value device value_up value_down systemd_cgtop
active_guests=$(get_active_guests "$exclude") active_guests=$(get_active_guests "$exclude")
# Percentage is shown only shown after multiple iterations of systemd-cgtop
systemd_cgtop=$(systemd-cgtop -b -n4 -d 250ms)
echo "multigraph lxc_cpu" echo "multigraph lxc_cpu"
for guest_name in $active_guests for guest_name in $active_guests
do do
for cpu_usage in user system for cpu_usage in user system
do do
echo "$(clean_fieldname "cpu_${cpu_usage}_${guest_name}").value $(get_lxc_cgroup_info "$guest_name" "cpuacct.stat" | grep "$cpu_usage" | awk '{ print $2; }')" echo "$(clean_fieldname "cpu_${cpu_usage}_${guest_name}").value $(get_lxc_cgroup_info "$guest_name" "cpu.stat" | grep "$cpu_usage" | awk '{ print $2; }')"
done done
done done
echo "multigraph lxc_cpu_time" echo "multigraph lxc_cpu_time"
for guest_name in $active_guests for guest_name in $active_guests
do do
echo "$(clean_fieldname "cpu_time_${guest_name}").value $(get_lxc_cgroup_info "$guest_name" "cpuacct.usage")" echo "$(clean_fieldname "cpu_time_${guest_name}").value $(get_lxc_cgroup_info "$guest_name" "cpu.stat" | grep "usage_usec" | awk '{ print $2; }')"
done
echo "multigraph lxc_cpu_usage"
for guest_name in $active_guests
do
cpu_usage_value="U"
cpu_usage_value=$(echo "$systemd_cgtop" | grep "lxc.payload.$guest_name " | awk '{ print $3; }' | grep -E '[0-9].' | tail -1)
echo "$(clean_fieldname "cpu_usage_${guest_name}").value $cpu_usage_value"
done done
echo "multigraph lxc_logins" echo "multigraph lxc_logins"
@ -365,14 +384,17 @@ EOF
echo "$(clean_fieldname "lxc_proc_${guest_name}").value $(lxc_count_processes "$guest_name")" echo "$(clean_fieldname "lxc_proc_${guest_name}").value $(lxc_count_processes "$guest_name")"
done done
echo "multigraph lxc_task"
for guest_name in $active_guests
do
echo "$(clean_fieldname "lxc_task_${guest_name}").value $(cat /sys/fs/cgroup/lxc.payload."$guest_name"/pids.current)"
done
echo "multigraph lxc_ram" echo "multigraph lxc_ram"
for guest_name in $active_guests for guest_name in $active_guests
do do
cat <<EOF cat <<EOF
$(clean_fieldname "mem_usage_${guest_name}").value $(get_lxc_cgroup_info "$guest_name" "memory.usage_in_bytes") $(clean_fieldname "mem_usage_${guest_name}").value $(get_lxc_cgroup_info "$guest_name" "memory.current")
$(clean_fieldname "mem_cache_${guest_name}").value $(get_lxc_cgroup_info "$guest_name" "memory.stat" | grep total_cache | awk '{print $2;}')
$(clean_fieldname "mem_active_${guest_name}").value $(get_lxc_cgroup_info "$guest_name" "memory.stat" | grep total_active_anon | awk '{print $2;}')
$(clean_fieldname "mem_inactive_${guest_name}").value $(get_lxc_cgroup_info "$guest_name" "memory.stat" | grep total_inactive_anon | awk '{print $2;}')
EOF EOF
done done
} }
@ -396,3 +418,4 @@ case "${1:-}" in
echo >&2 "Invalid action requested (none of: autoconf / config / '')" echo >&2 "Invalid action requested (none of: autoconf / config / '')"
exit 1 exit 1
esac esac