Skip to content

Instantly share code, notes, and snippets.

@priyankajayaswal1
Created April 6, 2015 19:04
Show Gist options
  • Save priyankajayaswal1/1ff6d72afc12fa5156ae to your computer and use it in GitHub Desktop.
Save priyankajayaswal1/1ff6d72afc12fa5156ae to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This bot uses external filtering programs for munging text.
For example:
python pwb.py piper.py -filter:'tr A-Z a-z' Wikipedia:Sandbox
Would lower case the article with tr(1).
Muliple -filter commands can be specified:
python pwb.py piper.py -filter:cat -filter:'tr A-Z a-z' -filter:'tr a-z A-Z' Wikipedia:Sandbox
Would pipe the article text through cat(1) (NOOP) and then lower case
it with tr(1) and upper case it again with tr(1)
The following parameters are supported:
&params;
-always Always commit changes without asking you to accept them
-filter: Filter the article text through this program, can be
given multiple times to filter through multiple programs in
the order which they are given
"""
#
# (C) Pywikibot team, 2008-2015
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id: c00c7b320006ee2eae5f18b0024442e8fc1ced9e $'
#
import os
import pipes
import tempfile
import pywikibot
from pywikibot import i18n, pagegenerators
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'&params;': pagegenerators.parameterHelp
}
class PiperBot(ExistingPageBot, NoRedirectPageBot):
"""Bot for munging text using external filtering programs."""
def __init__(self, generator, **kwargs):
"""
Constructor.
Parameters:
* generator - The page generator that determines on which pages
to work on.
* always - If True, don't prompt for changes
"""
self.availableOptions.update({
'always': False,
'filters': [],
'comment': '',
})
super(PiperBot, self).__init__(generator=generator)
def pipe(self, program, text):
"""Piping a given text through a given program and returns it."""
text = text.encode('utf-8')
pipe = pipes.Template()
pipe.append(program.encode('ascii'), '--')
# Create a temporary filename to save the piped stuff to
tempFilename = '%s.%s' % (tempfile.mktemp(), 'txt')
with pipe.open(tempFilename, 'w') as file:
file.write(text)
# Now retrieve the munged text
mungedText = open(tempFilename, 'r').read()
# clean up
os.unlink(tempFilename)
unicode_text = mungedText.decode('utf-8')
return unicode_text
def treat(self, page):
"""Load the given page, does some changes, and saves it."""
try:
# Load the page
text = page.get()
except pywikibot.IsRedirectPage:
pywikibot.output(u"Page %s is a redirect; skipping."
% page.title(asLink=True))
return
# Munge!
for program in self.getOption('filters'):
print self.getOption('filters')
text = self.pipe(program, text)
# only save if something was changed
if text != page.get():
print 'ff'
try:
self.put_current(new_text=text, ignore_save_related_errors=True)
except pywikibot.LockedPage:
pywikibot.output("Page %s is locked?!"
% self.current_page.title(asLink=True))
except pywikibot.EditConflict:
pywikibot.output("An edit conflict has occured at %s."
% self.current_page.title(asLink=True))
def main(*args):
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# The generator gives the pages that should be worked upon.
gen = None
# This temporary array is used to read the page title if one single
# page to work on is specified by the arguments.
pageTitleParts = []
# The program to pipe stuff through
filters = []
options = {}
# Parse command line arguments
for arg in pywikibot.handle_args(args):
if arg.startswith("-filter:"):
prog = arg[8:]
filters.append(prog)
options['filters'] = filters
elif arg.startswith("-always"):
options['always'] = True
else:
# check if a standard argument like
# -start:XYZ or -ref:Asdf was given.
if not genFactory.handleArg(arg):
pageTitleParts.append(arg)
if pageTitleParts != []:
# We will only work on a single page.
pageTitle = ' '.join(pageTitleParts)
page = pywikibot.Page(pywikibot.Site(), pageTitle)
gen = iter([page])
options['comment'] = i18n.twtranslate(pywikibot.Site().lang, 'piper-edit-summary') % (', '.join(options['filters']))
if not gen:
gen = genFactory.getCombinedGenerator()
if gen:
# The preloading generator is responsible for downloading multiple
# pages from the wiki simultaneously.
gen = pagegenerators.PreloadingGenerator(gen)
bot = PiperBot(gen, **options)
bot.run()
else:
pywikibot.showHelp()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment