munin-contrib/plugins/moinmoin/moinmoin_pages

137 lines
3.4 KiB
Python
Executable File

#!/usr/bin/env python3
"""
=head1 NAME
moinmoin_pages - lists the number of pages in all wikis of a MoinMoin wikifarm
ACL-protected pages are included.
=head1 CONFIGURATION
[moinmoin_*]
user www
=head1 IMPLEMENTATION NOTES
The plugin is quite koumbit-specific:
=over 4
=item 1. the wikifarm config is hardcoded
=item 2. "wikilist.py" is assumed to contain the list of wiki -> url patterns
=item 3. url patterns are assumed to be simple enough, that they are decodable into an url
=back
Also note that this plugin reuses code from MoinMoin/wikimacro.py's SystemInfo macro.
Finally, i tried using XMLRPC instead of native functions to fetch the data, but it ended up being
slower. For the record, here is what the getPageList() call would have looked like:
xmlrpclib.ServerProxy("http://wiki.koumbit.net/?action=xmlrpc2").getAllPages()
The quick benchmark I did yielded those results for the getAllPages() vs getPageList() calls:
xmlrpc: 2.35 real 0.12 user 0.04 sys
native: 1.44 real 1.07 user 0.35 sys
So the plugin is spending more time in the CPU (all time, actually), but it's doing in faster.
It is highly possible that the CPU time spared in XMLRPC is in fact used by the server.
=head1 AUTHORS
Copyleft 2007, The Anarcat <anarcat@koumbit.org>
=head1 LICENSE
Licensed under the GPLv2 or any later version.
SPDX-License-Identifier: GPL-2.0-or-later
=cut
"""
import os
from re import sub
import sys
from MoinMoin import wikiutil
from MoinMoin.request import RequestCLI
os.chdir('/export/wiki/config')
sys.path.insert(0, '/export/wiki/config')
from farmconfig import wikis # noqa: E402
def _formatInReadableUnits(size):
size = float(size)
unit = u' Byte'
if size > 9999:
unit = u' KiB'
size /= 1024
if size > 9999:
unit = u' MiB'
size /= 1024
if size > 9999:
unit = u' GiB'
size /= 1024
return u"%.1f %s" % (size, unit)
def _getDirectorySize(path):
try:
dirsize = 0
for root, dirs, files in os.walk(path):
dirsize += sum([os.path.getsize(os.path.join(root, name)) for name in files])
except EnvironmentError:
dirsize = -1
return dirsize
def main():
for wiki in wikis:
name = wiki[0]
url = wiki[1]
# XXX, hack: transform the regexp into a canonical url
# we need canonical urls in the config for this to be clean
# look for (foo|bar) and replace with foo
url = sub(r'\(([^\|]*)(\|[^\)]*\))+', r'\1', url)
# remove common regexp patterns and slap a protocol to make this a real url
url = sub(r'[\^\$]|(\.\*)', '', url)
request = RequestCLI(url)
pagelist = request.rootpage.getPageList(user='')
systemPages = [page for page in pagelist if wikiutil.isSystemPage(request, page)]
print(name + '.value ' + str(len(pagelist) - len(systemPages)))
def config():
print("""graph_title Wiki size
graph_vlabel Number of pages
graph_args --base 1000 -l 0
graph_scale no
graph_category wiki
graph_info The number of pages excludes system pages but includes ACL-protected pages.""")
for wiki in wikis:
name = wiki[0]
mod = getattr(__import__(name), 'Config')
print(name + '.label ' + getattr(mod, 'sitename'))
if __name__ == "__main__":
if len(sys.argv) > 1 and sys.argv[1] == 'config':
config()
else:
main()