Created
April 6, 2015 19:04
-
-
Save priyankajayaswal1/1ff6d72afc12fa5156ae to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
""" | |
This bot uses external filtering programs for munging text. | |
For example: | |
python pwb.py piper.py -filter:'tr A-Z a-z' Wikipedia:Sandbox | |
Would lower case the article with tr(1). | |
Muliple -filter commands can be specified: | |
python pwb.py piper.py -filter:cat -filter:'tr A-Z a-z' -filter:'tr a-z A-Z' Wikipedia:Sandbox | |
Would pipe the article text through cat(1) (NOOP) and then lower case | |
it with tr(1) and upper case it again with tr(1) | |
The following parameters are supported: | |
¶ms; | |
-always Always commit changes without asking you to accept them | |
-filter: Filter the article text through this program, can be | |
given multiple times to filter through multiple programs in | |
the order which they are given | |
""" | |
# | |
# (C) Pywikibot team, 2008-2015 | |
# | |
# Distributed under the terms of the MIT license. | |
# | |
__version__ = '$Id: c00c7b320006ee2eae5f18b0024442e8fc1ced9e $' | |
# | |
import os | |
import pipes | |
import tempfile | |
import pywikibot | |
from pywikibot import i18n, pagegenerators | |
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot | |
# This is required for the text that is shown when you run this script | |
# with the parameter -help. | |
docuReplacements = { | |
'¶ms;': pagegenerators.parameterHelp | |
} | |
class PiperBot(ExistingPageBot, NoRedirectPageBot): | |
"""Bot for munging text using external filtering programs.""" | |
def __init__(self, generator, **kwargs): | |
""" | |
Constructor. | |
Parameters: | |
* generator - The page generator that determines on which pages | |
to work on. | |
* always - If True, don't prompt for changes | |
""" | |
self.availableOptions.update({ | |
'always': False, | |
'filters': [], | |
'comment': '', | |
}) | |
super(PiperBot, self).__init__(generator=generator) | |
def pipe(self, program, text): | |
"""Piping a given text through a given program and returns it.""" | |
text = text.encode('utf-8') | |
pipe = pipes.Template() | |
pipe.append(program.encode('ascii'), '--') | |
# Create a temporary filename to save the piped stuff to | |
tempFilename = '%s.%s' % (tempfile.mktemp(), 'txt') | |
with pipe.open(tempFilename, 'w') as file: | |
file.write(text) | |
# Now retrieve the munged text | |
mungedText = open(tempFilename, 'r').read() | |
# clean up | |
os.unlink(tempFilename) | |
unicode_text = mungedText.decode('utf-8') | |
return unicode_text | |
def treat(self, page): | |
"""Load the given page, does some changes, and saves it.""" | |
try: | |
# Load the page | |
text = page.get() | |
except pywikibot.IsRedirectPage: | |
pywikibot.output(u"Page %s is a redirect; skipping." | |
% page.title(asLink=True)) | |
return | |
# Munge! | |
for program in self.getOption('filters'): | |
print self.getOption('filters') | |
text = self.pipe(program, text) | |
# only save if something was changed | |
if text != page.get(): | |
print 'ff' | |
try: | |
self.put_current(new_text=text, ignore_save_related_errors=True) | |
except pywikibot.LockedPage: | |
pywikibot.output("Page %s is locked?!" | |
% self.current_page.title(asLink=True)) | |
except pywikibot.EditConflict: | |
pywikibot.output("An edit conflict has occured at %s." | |
% self.current_page.title(asLink=True)) | |
def main(*args): | |
# This factory is responsible for processing command line arguments | |
# that are also used by other scripts and that determine on which pages | |
# to work on. | |
genFactory = pagegenerators.GeneratorFactory() | |
# The generator gives the pages that should be worked upon. | |
gen = None | |
# This temporary array is used to read the page title if one single | |
# page to work on is specified by the arguments. | |
pageTitleParts = [] | |
# The program to pipe stuff through | |
filters = [] | |
options = {} | |
# Parse command line arguments | |
for arg in pywikibot.handle_args(args): | |
if arg.startswith("-filter:"): | |
prog = arg[8:] | |
filters.append(prog) | |
options['filters'] = filters | |
elif arg.startswith("-always"): | |
options['always'] = True | |
else: | |
# check if a standard argument like | |
# -start:XYZ or -ref:Asdf was given. | |
if not genFactory.handleArg(arg): | |
pageTitleParts.append(arg) | |
if pageTitleParts != []: | |
# We will only work on a single page. | |
pageTitle = ' '.join(pageTitleParts) | |
page = pywikibot.Page(pywikibot.Site(), pageTitle) | |
gen = iter([page]) | |
options['comment'] = i18n.twtranslate(pywikibot.Site().lang, 'piper-edit-summary') % (', '.join(options['filters'])) | |
if not gen: | |
gen = genFactory.getCombinedGenerator() | |
if gen: | |
# The preloading generator is responsible for downloading multiple | |
# pages from the wiki simultaneously. | |
gen = pagegenerators.PreloadingGenerator(gen) | |
bot = PiperBot(gen, **options) | |
bot.run() | |
else: | |
pywikibot.showHelp() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment