Created
March 19, 2015 20:31
-
-
Save cgt/9df8afdbfa1c7ab2f348 to your computer and use it in GitHub Desktop.
Church infobox URL length report generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
""" | |
An incomplete sample script. | |
This is not a complete bot; rather, it is a template from which simple | |
bots can be made. You can rename it to mybot.py, then edit it in | |
whatever way you want. | |
The following parameters are supported: | |
¶ms; | |
""" | |
# | |
# (C) Pywikibot team, 2006-2014 | |
# | |
# Distributed under the terms of the MIT license. | |
# | |
__version__ = '$Id: 33d6319ef50b20b70e4eaccc851ab4ddb43053a8 $' | |
# | |
import pywikibot | |
from pywikibot import pagegenerators | |
from pywikibot import i18n | |
import mwparserfromhell | |
# This is required for the text that is shown when you run this script | |
# with the parameter -help. | |
docuReplacements = { | |
'¶ms;': pagegenerators.parameterHelp | |
} | |
class BasicBot: | |
"""An incomplete sample bot.""" | |
# Edit summary message that should be used is placed on /i18n subdirectory. | |
# The file containing these messages should have the same name as the caller | |
# script (i.e. basic.py in this case) | |
def __init__(self, generator): | |
""" | |
Constructor. | |
Parameters: | |
@param generator: The page generator that determines on which pages | |
to work. | |
@type generator: generator. | |
""" | |
self.generator = generator | |
site = pywikibot.Site() | |
def run(self): | |
"""Process each page from the generator.""" | |
for page in self.generator: | |
self.treat(page) | |
def treat(self, page): | |
"""Load the given page, does some changes, and saves it.""" | |
text = self.load(page) | |
if not text: | |
return | |
wtext = mwparserfromhell.parse(text) | |
for tmpl in wtext.filter_templates(): | |
if tmpl.name != u"Kirke": | |
continue | |
if not tmpl.has(u"Websted"): | |
continue | |
www = tmpl.get(u"Websted") | |
pywikibot.output(u"{}:{}:{}".format(page.title(), len(www.value.strip()), | |
www.value.strip()), toStdout=True) | |
def load(self, page): | |
"""Load the text of the given page.""" | |
try: | |
# Load the page | |
text = page.get() | |
except pywikibot.NoPage: | |
pywikibot.output(u"Page %s does not exist; skipping." | |
% page.title(asLink=True)) | |
except pywikibot.IsRedirectPage: | |
pywikibot.output(u"Page %s is a redirect; skipping." | |
% page.title(asLink=True)) | |
else: | |
return text | |
return None | |
def main(*args): | |
""" | |
Process command line arguments and invoke bot. | |
If args is an empty list, sys.argv is used. | |
@param args: command line arguments | |
@type args: list of unicode | |
""" | |
# Process global arguments to determine desired site | |
local_args = pywikibot.handle_args(args) | |
# This factory is responsible for processing command line arguments | |
# that are also used by other scripts and that determine on which pages | |
# to work on. | |
genFactory = pagegenerators.GeneratorFactory() | |
# The generator gives the pages that should be worked upon. | |
gen = None | |
# If dry is True, doesn't do any real changes, but only show | |
# what would have been changed. | |
dry = False | |
# Parse command line arguments | |
for arg in local_args: | |
if arg.startswith("-dry"): | |
dry = True | |
else: | |
genFactory.handleArg(arg) | |
if not gen: | |
gen = genFactory.getCombinedGenerator() | |
if gen: | |
# The preloading generator is responsible for downloading multiple | |
# pages from the wiki simultaneously. | |
gen = pagegenerators.PreloadingGenerator(gen) | |
bot = BasicBot(gen) | |
bot.run() | |
else: | |
pywikibot.showHelp() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment