137 lines
3.4 KiB
Python
Executable File
137 lines
3.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
"""
|
|
|
|
=head1 NAME
|
|
|
|
moinmoin_pages - lists the number of pages in all wikis of a MoinMoin wikifarm
|
|
|
|
|
|
ACL-protected pages are included.
|
|
|
|
|
|
=head1 CONFIGURATION
|
|
|
|
[moinmoin_*]
|
|
user www
|
|
|
|
|
|
=head1 IMPLEMENTATION NOTES
|
|
|
|
The plugin is quite koumbit-specific:
|
|
|
|
=over 4
|
|
|
|
=item 1. the wikifarm config is hardcoded
|
|
|
|
=item 2. "wikilist.py" is assumed to contain the list of wiki -> url patterns
|
|
|
|
=item 3. url patterns are assumed to be simple enough, that they are decodable into an url
|
|
|
|
=back
|
|
|
|
Also note that this plugin reuses code from MoinMoin/wikimacro.py's SystemInfo macro.
|
|
|
|
Finally, i tried using XMLRPC instead of native functions to fetch the data, but it ended up being
|
|
slower. For the record, here is what the getPageList() call would have looked like:
|
|
|
|
xmlrpclib.ServerProxy("http://wiki.koumbit.net/?action=xmlrpc2").getAllPages()
|
|
|
|
The quick benchmark I did yielded those results for the getAllPages() vs getPageList() calls:
|
|
|
|
xmlrpc: 2.35 real 0.12 user 0.04 sys
|
|
native: 1.44 real 1.07 user 0.35 sys
|
|
|
|
So the plugin is spending more time in the CPU (all time, actually), but it's doing in faster.
|
|
It is highly possible that the CPU time spared in XMLRPC is in fact used by the server.
|
|
|
|
|
|
=head1 AUTHORS
|
|
|
|
Copyleft 2007, The Anarcat <anarcat@koumbit.org>
|
|
|
|
|
|
=head1 LICENSE
|
|
|
|
Licensed under the GPLv2 or any later version.
|
|
|
|
SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
=cut
|
|
"""
|
|
|
|
import os
|
|
from re import sub
|
|
import sys
|
|
|
|
from MoinMoin import wikiutil
|
|
from MoinMoin.request import RequestCLI
|
|
|
|
os.chdir('/export/wiki/config')
|
|
sys.path.insert(0, '/export/wiki/config')
|
|
|
|
from farmconfig import wikis # noqa: E402
|
|
|
|
|
|
def _formatInReadableUnits(size):
|
|
size = float(size)
|
|
unit = u' Byte'
|
|
if size > 9999:
|
|
unit = u' KiB'
|
|
size /= 1024
|
|
if size > 9999:
|
|
unit = u' MiB'
|
|
size /= 1024
|
|
if size > 9999:
|
|
unit = u' GiB'
|
|
size /= 1024
|
|
return u"%.1f %s" % (size, unit)
|
|
|
|
|
|
def _getDirectorySize(path):
|
|
try:
|
|
dirsize = 0
|
|
for root, dirs, files in os.walk(path):
|
|
dirsize += sum([os.path.getsize(os.path.join(root, name)) for name in files])
|
|
except EnvironmentError:
|
|
dirsize = -1
|
|
return dirsize
|
|
|
|
|
|
def main():
|
|
for wiki in wikis:
|
|
name = wiki[0]
|
|
url = wiki[1]
|
|
# XXX, hack: transform the regexp into a canonical url
|
|
# we need canonical urls in the config for this to be clean
|
|
# look for (foo|bar) and replace with foo
|
|
url = sub(r'\(([^\|]*)(\|[^\)]*\))+', r'\1', url)
|
|
# remove common regexp patterns and slap a protocol to make this a real url
|
|
url = sub(r'[\^\$]|(\.\*)', '', url)
|
|
|
|
request = RequestCLI(url)
|
|
pagelist = request.rootpage.getPageList(user='')
|
|
|
|
systemPages = [page for page in pagelist if wikiutil.isSystemPage(request, page)]
|
|
print(name + '.value ' + str(len(pagelist) - len(systemPages)))
|
|
|
|
|
|
def config():
|
|
print("""graph_title Wiki size
|
|
graph_vlabel Number of pages
|
|
graph_args --base 1000 -l 0
|
|
graph_scale no
|
|
graph_category wiki
|
|
graph_info The number of pages excludes system pages but includes ACL-protected pages.""")
|
|
for wiki in wikis:
|
|
name = wiki[0]
|
|
mod = getattr(__import__(name), 'Config')
|
|
print(name + '.label ' + getattr(mod, 'sitename'))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) > 1 and sys.argv[1] == 'config':
|
|
config()
|
|
else:
|
|
main()
|