453 lines
15 KiB
Python
Executable File
453 lines
15 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# pylint: disable=invalid-name
|
|
# pylint: enable=invalid-name
|
|
|
|
"""Munin plugin to monitor Varnish status.
|
|
|
|
=head1 NAME
|
|
|
|
varnish - monitor Varnish status
|
|
|
|
=head1 APPLICABLE SYSTEMS
|
|
|
|
Systems running Varnish cache.
|
|
|
|
=head1 CONFIGURATION
|
|
|
|
You need to create a file named varnish placed in the directory
|
|
/etc/munin/plugin-conf.d/ with the following config:
|
|
|
|
=over 2
|
|
|
|
[varnish]
|
|
env.backend_unhealthy_warning :1
|
|
env.threads_destroyed_warning :1
|
|
env.threads_failed_warning :1
|
|
env.threads_warning :1
|
|
|
|
=back
|
|
|
|
=head1 NOTES
|
|
|
|
This plugin is tested on Varnish 4 but it should also work with v5 and v6.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Kim B. Heino <b@bbbs.net>
|
|
|
|
This is based heavily on varnish4_ plugin from Munin contrib
|
|
by Kristian Lyngstol <kristian@bohemians.org> / Redpill Linpro AS.
|
|
|
|
=head1 LICENSE
|
|
|
|
GPLv2
|
|
|
|
=head1 MAGIC MARKERS
|
|
|
|
#%# family=auto
|
|
#%# capabilities=autoconf
|
|
|
|
=cut
|
|
"""
|
|
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
import os
|
|
|
|
|
|
def run_binary(arg):
|
|
"""Run binary and return output."""
|
|
try:
|
|
cmd = subprocess.Popen(
|
|
arg, shell=False, close_fds=True, bufsize=-1,
|
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
outdata, dummy_errdata = cmd.communicate()
|
|
except OSError:
|
|
return ''
|
|
return outdata.decode('utf-8', 'ignore')
|
|
|
|
|
|
def get_values():
|
|
"""Run varnishstat and parse it's output."""
|
|
output = run_binary(['/usr/bin/varnishstat', '-j'])
|
|
try:
|
|
return json.loads(output)['counters']
|
|
except (TypeError, ValueError):
|
|
return {}
|
|
|
|
|
|
def config():
|
|
"""Print plugin config."""
|
|
# pylint: disable=too-many-statements
|
|
|
|
# backend_traffic
|
|
print('multigraph varnish_backend_traffic')
|
|
print('graph_category webserver')
|
|
print('graph_title Backend traffic')
|
|
print('backend_busy.label Backend conn. too many')
|
|
print('backend_busy.min 0')
|
|
print('backend_busy.type DERIVE')
|
|
print('backend_conn.label Backend conn. success')
|
|
print('backend_conn.min 0')
|
|
print('backend_conn.type DERIVE')
|
|
print('backend_retry.label Backend conn. retry')
|
|
print('backend_retry.min 0')
|
|
print('backend_retry.type DERIVE')
|
|
print('backend_unhealthy.label Backend conn. not attempted')
|
|
print('backend_unhealthy.min 0')
|
|
if os.getenv('backend_unhealthy_warning') is not None:
|
|
print('backend_unhealthy.warning ' + os.getenv('backend_unhealthy_warning'))
|
|
print('backend_unhealthy.type DERIVE')
|
|
print('backend_recycle.label Backend conn. recycles')
|
|
print('backend_recycle.min 0')
|
|
print('backend_recycle.type DERIVE')
|
|
print('backend_fail.label Backend conn. failures')
|
|
print('backend_fail.min 0')
|
|
print('backend_fail.type DERIVE')
|
|
print('backend_reuse.label Backend conn. reuses')
|
|
print('backend_reuse.min 0')
|
|
print('backend_reuse.type DERIVE')
|
|
print('backend_req.label Backend requests made')
|
|
print('backend_req.min 0')
|
|
print('backend_req.type DERIVE')
|
|
|
|
# bad
|
|
print('multigraph varnish_bad')
|
|
print('graph_category webserver')
|
|
print('graph_title Misbehavior')
|
|
print('sess_dropped.label Sessions dropped')
|
|
print('sess_dropped.type DERIVE')
|
|
print('threads_failed.label Thread creation failed')
|
|
print('threads_failed.type DERIVE')
|
|
print('threads_destroyed.label Threads destroyed')
|
|
print('threads_destroyed.type DERIVE')
|
|
print('thread_queue_len.label Length of session queue')
|
|
print('thread_queue_len.type GAUGE')
|
|
print('sc_pipe_overflow.label Session pipe overflow')
|
|
print('sc_pipe_overflow.type DERIVE')
|
|
print('esi_warnings.label ESI parse warnings (unlock)')
|
|
print('esi_warnings.type DERIVE')
|
|
print('sess_fail.label Session accept failures')
|
|
print('sess_fail.type DERIVE')
|
|
print('backend_busy.label Backend conn. too many')
|
|
print('backend_busy.type DERIVE')
|
|
print('esi_errors.label ESI parse errors (unlock)')
|
|
print('esi_errors.type DERIVE')
|
|
print('SMA_Transient_c_fail.label Allocator failures SMA Transient')
|
|
print('SMA_Transient_c_fail.type DERIVE')
|
|
print('losthdr.label HTTP header overflows')
|
|
print('losthdr.type DERIVE')
|
|
print('backend_unhealthy.label Backend conn. not attempted')
|
|
print('backend_unhealthy.type DERIVE')
|
|
print('threads_limited.label Threads hit max')
|
|
print('threads_limited.type DERIVE')
|
|
print('fetch_failed.label Fetch failed (all causes)')
|
|
print('fetch_failed.type DERIVE')
|
|
|
|
# expunge
|
|
print('multigraph varnish_expunge')
|
|
print('graph_category webserver')
|
|
print('graph_title Object expunging')
|
|
print('graph_order n_expired n_lru_nuked')
|
|
print('n_lru_nuked.label Number of LRU nuked objects')
|
|
print('n_lru_nuked.min 0')
|
|
print('n_lru_nuked.type DERIVE')
|
|
print('n_expired.label Number of expired objects')
|
|
print('n_expired.min 0')
|
|
print('n_expired.type DERIVE')
|
|
|
|
# hit_rate
|
|
print('multigraph varnish_hit_rate')
|
|
print('graph_category webserver')
|
|
print('graph_title Hit rates')
|
|
print('graph_order client_req cache_hit cache_miss cache_hitpass')
|
|
print('graph_scale no')
|
|
print('graph_vlabel %')
|
|
print('graph_args -l 0 -u 100 --rigid')
|
|
print('client_req.label Good client requests received')
|
|
print('client_req.graph off')
|
|
print('client_req.min 0')
|
|
print('client_req.type DERIVE')
|
|
print('cache_miss.label Cache misses')
|
|
print('cache_miss.min 0')
|
|
print('cache_miss.draw STACK')
|
|
print('cache_miss.cdef cache_miss,client_req,/,100,*')
|
|
print('cache_miss.type DERIVE')
|
|
print('cache_hit.label Cache hits')
|
|
print('cache_hit.min 0')
|
|
print('cache_hit.draw AREA')
|
|
print('cache_hit.cdef cache_hit,client_req,/,100,*')
|
|
print('cache_hit.type DERIVE')
|
|
print('cache_hitpass.label Cache hits for pass')
|
|
print('cache_hitpass.min 0')
|
|
print('cache_hitpass.draw STACK')
|
|
print('cache_hitpass.cdef cache_hitpass,client_req,/,100,*')
|
|
print('cache_hitpass.type DERIVE')
|
|
|
|
# memory_usage
|
|
print('multigraph varnish_memory_usage')
|
|
print('graph_category webserver')
|
|
print('graph_title Memory usage')
|
|
print('graph_vlabel bytes')
|
|
print('graph_args --base 1024')
|
|
print('SMA_Transient_g_bytes.label Bytes outstanding SMA Transient')
|
|
print('SMA_Transient_g_bytes.type GAUGE')
|
|
print('SMA_Transient_g_space.label Bytes available SMA Transient')
|
|
print('SMA_Transient_g_space.type GAUGE')
|
|
print('SMA_Transient_c_bytes.label Bytes allocated SMA Transient')
|
|
print('SMA_Transient_c_bytes.type DERIVE')
|
|
|
|
# objects
|
|
print('multigraph varnish_objects')
|
|
print('graph_category webserver')
|
|
print('graph_title Number of objects')
|
|
print('graph_order n_object n_objectcore n_vampireobject n_objecthead')
|
|
print('n_object.label Number of objects')
|
|
print('n_object.type GAUGE')
|
|
print('n_vampireobject.label Number of unresurrected objects')
|
|
print('n_vampireobject.type GAUGE')
|
|
print('n_objectcore.label Number of object cores')
|
|
print('n_objectcore.type GAUGE')
|
|
print('n_objecthead.label Number of object heads')
|
|
print('n_objecthead.info Each object head can have one or more object '
|
|
'attached, typically based on the Vary: header')
|
|
print('n_objecthead.type GAUGE')
|
|
|
|
# request_rate
|
|
print('multigraph varnish_request_rate')
|
|
print('graph_category webserver')
|
|
print('graph_title Request rates')
|
|
print('graph_order cache_hit cache_hitpass cache_miss backend_conn '
|
|
'backend_unhealthy client_req client_conn')
|
|
print('client_req.label Good client requests received')
|
|
print('client_req.min 0')
|
|
print('client_req.colour 111111')
|
|
print('client_req.type DERIVE')
|
|
print('s_pipe.label Total pipe sessions seen')
|
|
print('s_pipe.min 0')
|
|
print('s_pipe.colour 1d2bdf')
|
|
print('s_pipe.type DERIVE')
|
|
print('sess_conn.label Sessions accepted')
|
|
print('sess_conn.graph ON')
|
|
print('sess_conn.min 0')
|
|
print('sess_conn.colour 444444')
|
|
print('sess_conn.type DERIVE')
|
|
print('cache_miss.label Cache misses')
|
|
print('cache_miss.min 0')
|
|
print('cache_miss.draw STACK')
|
|
print('cache_miss.colour FF0000')
|
|
print('cache_miss.type DERIVE')
|
|
print('backend_conn.label Backend conn. success')
|
|
print('backend_conn.min 0')
|
|
print('backend_conn.colour 995599')
|
|
print('backend_conn.type DERIVE')
|
|
print('s_pass.label Total pass-ed requests seen')
|
|
print('s_pass.min 0')
|
|
print('s_pass.colour 785d0d')
|
|
print('s_pass.type DERIVE')
|
|
print('backend_unhealthy.label Backend conn. not attempted')
|
|
print('backend_unhealthy.min 0')
|
|
print('backend_unhealthy.colour FF55FF')
|
|
print('backend_unhealthy.type DERIVE')
|
|
print('cache_hitpass.label Cache hits for pass')
|
|
print('cache_hitpass.min 0')
|
|
print('cache_hitpass.draw STACK')
|
|
print('cache_hitpass.colour FFFF00')
|
|
print('cache_hitpass.info Hitpass are cached passes: An entry in the '
|
|
'cache instructing Varnish to pass. Typically achieved after a '
|
|
'pass in vcl_fetch.')
|
|
print('cache_hitpass.type DERIVE')
|
|
print('cache_hit.label Cache hits')
|
|
print('cache_hit.min 0')
|
|
print('cache_hit.draw AREA')
|
|
print('cache_hit.colour 00FF00')
|
|
print('cache_hit.type DERIVE')
|
|
|
|
# threads
|
|
print('multigraph varnish_threads')
|
|
print('graph_category webserver')
|
|
print('graph_title Thread status')
|
|
print('threads_created.label Threads created')
|
|
print('threads_created.min 0')
|
|
print('threads_created.type DERIVE')
|
|
print('threads_failed.label Thread creation failed')
|
|
print('threads_failed.min 0')
|
|
if os.getenv('threads_failed_warning') is not None:
|
|
print('threads_failed.warning ' + os.getenv('threads_failed_warning'))
|
|
print('threads_failed.type DERIVE')
|
|
print('threads_destroyed.label Threads destroyed')
|
|
print('threads_destroyed.min 0')
|
|
if os.getenv('threads_destroyed_warning') is not None:
|
|
print('threads_destroyed.warning ' + os.getenv('threads_destroyed_warning'))
|
|
print('threads_destroyed.type DERIVE')
|
|
print('threads_limited.label Threads hit max')
|
|
print('threads_limited.min 0')
|
|
print('threads_limited.type DERIVE')
|
|
print('threads.label Total number of threads')
|
|
print('threads.min 0')
|
|
if os.getenv('threads_warning') is not None:
|
|
print('threads.warning ' + os.getenv('threads_warning'))
|
|
print('threads.type GAUGE')
|
|
|
|
# transfer_rates
|
|
print('multigraph varnish_transfer_rates')
|
|
print('graph_category webserver')
|
|
print('graph_title Transfer rates')
|
|
print('graph_order s_resp_bodybytes s_resp_hdrbytes')
|
|
print('graph_vlabel bit/s')
|
|
print('graph_args -l 0')
|
|
print('s_resp_bodybytes.label Body traffic')
|
|
print('s_resp_bodybytes.min 0')
|
|
print('s_resp_bodybytes.draw AREA')
|
|
print('s_resp_bodybytes.cdef s_resp_bodybytes,8,*')
|
|
print('s_resp_bodybytes.type DERIVE')
|
|
print('s_resp_hdrbytes.label Header traffic')
|
|
print('s_resp_hdrbytes.min 0')
|
|
print('s_resp_hdrbytes.draw STACK')
|
|
print('s_resp_hdrbytes.cdef s_resp_hdrbytes,8,*')
|
|
print('s_resp_hdrbytes.info HTTP Header traffic. TCP/IP overhead is not '
|
|
'included.')
|
|
print('s_resp_hdrbytes.type DERIVE')
|
|
|
|
# uptime
|
|
print('multigraph varnish_uptime')
|
|
print('graph_category webserver')
|
|
print('graph_title Varnish uptime')
|
|
print('graph_scale no')
|
|
print('graph_vlabel days')
|
|
print('uptime.label Management process uptime')
|
|
print('uptime.cdef uptime,86400,/')
|
|
print('uptime.type GAUGE')
|
|
|
|
|
|
def fetch():
|
|
"""Print values."""
|
|
# pylint: disable=too-many-statements
|
|
data = get_values()
|
|
if not data:
|
|
return
|
|
|
|
# backend_traffic
|
|
print('multigraph varnish_backend_traffic')
|
|
for key in (
|
|
'backend_busy',
|
|
'backend_conn',
|
|
'backend_retry',
|
|
'backend_unhealthy',
|
|
'backend_recycle',
|
|
'backend_fail',
|
|
'backend_reuse',
|
|
'backend_req',
|
|
):
|
|
print('{0}.value {1}'.format(key, data['MAIN.' + key]['value']))
|
|
|
|
# bad
|
|
print('multigraph varnish_bad')
|
|
for key in (
|
|
'sess_dropped',
|
|
'threads_failed',
|
|
'threads_destroyed',
|
|
'thread_queue_len',
|
|
'sc_pipe_overflow',
|
|
'esi_warnings',
|
|
'sess_fail',
|
|
'backend_busy',
|
|
'esi_errors',
|
|
'losthdr',
|
|
'backend_unhealthy',
|
|
'threads_limited',
|
|
'fetch_failed',
|
|
):
|
|
print('{0}.value {1}'.format(key, data['MAIN.' + key]['value']))
|
|
print('SMA_Transient_c_fail.value {0}'.format(
|
|
data['SMA.Transient.c_fail']['value']))
|
|
|
|
# expunge
|
|
print('multigraph varnish_expunge')
|
|
for key in (
|
|
'n_lru_nuked',
|
|
'n_expired',
|
|
):
|
|
print('{0}.value {1}'.format(key, data['MAIN.' + key]['value']))
|
|
|
|
# hit_rate
|
|
print('multigraph varnish_hit_rate')
|
|
print('client_req.value {0}'.format(
|
|
data['MAIN.cache_hit']['value'] +
|
|
data['MAIN.cache_miss']['value'] +
|
|
data['MAIN.cache_hitpass']['value']))
|
|
for key in (
|
|
'cache_miss',
|
|
'cache_hit',
|
|
'cache_hitpass',
|
|
):
|
|
print('{0}.value {1}'.format(
|
|
key, data['MAIN.' + key]['value']))
|
|
|
|
# memory_usage
|
|
print('multigraph varnish_memory_usage')
|
|
print('SMA_Transient_g_bytes.value {0}'.format(
|
|
data['SMA.Transient.g_bytes']['value']))
|
|
print('SMA_Transient_g_space.value {0}'.format(
|
|
data['SMA.Transient.g_space']['value']))
|
|
print('SMA_Transient_c_bytes.value {0}'.format(
|
|
data['SMA.Transient.c_bytes']['value']))
|
|
|
|
# objects
|
|
print('multigraph varnish_objects')
|
|
for key in (
|
|
'n_object',
|
|
'n_vampireobject',
|
|
'n_objectcore',
|
|
'n_objecthead',
|
|
):
|
|
print('{0}.value {1}'.format(key, data['MAIN.' + key]['value']))
|
|
|
|
# request_rate
|
|
print('multigraph varnish_request_rate')
|
|
for key in (
|
|
'client_req',
|
|
's_pipe',
|
|
'sess_conn',
|
|
'cache_miss',
|
|
'backend_conn',
|
|
's_pass',
|
|
'backend_unhealthy',
|
|
'cache_hitpass',
|
|
'cache_hit',
|
|
):
|
|
print('{0}.value {1}'.format(key, data['MAIN.' + key]['value']))
|
|
|
|
# threads
|
|
print('multigraph varnish_threads')
|
|
for key in (
|
|
'threads_created',
|
|
'threads_failed',
|
|
'threads_destroyed',
|
|
'threads_limited',
|
|
'threads',
|
|
):
|
|
print('{0}.value {1}'.format(key, data['MAIN.' + key]['value']))
|
|
|
|
# transfer_rates
|
|
print('multigraph varnish_transfer_rates')
|
|
for key in (
|
|
's_resp_bodybytes',
|
|
's_resp_hdrbytes',
|
|
):
|
|
print('{0}.value {1}'.format(key, data['MAIN.' + key]['value']))
|
|
|
|
# uptime
|
|
print('multigraph varnish_uptime')
|
|
print('uptime.value {0}'.format(data['MAIN.uptime']['value']))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if len(sys.argv) > 1 and sys.argv[1] == 'autoconf':
|
|
print('yes' if get_values() else 'no (varnish is not running)')
|
|
elif len(sys.argv) > 1 and sys.argv[1] == 'config':
|
|
config()
|
|
else:
|
|
fetch()
|