Last active
November 26, 2017 15:08
-
-
Save Ladsgroup/4b673975c7515f5022476493ae02636b to your computer and use it in GitHub Desktop.
Cochrane bot
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# License: MIT | |
import pywikibot | |
import re | |
import urllib2 | |
from pywikibot import pagegenerators | |
site = pywikibot.Site('en') | |
generator = pagegenerators.SearchPageGenerator('insource:/\| *journal *= *.+Cochrane/', site=site, namespaces=[0]) | |
gen = pagegenerators.PreloadingGenerator(generator) | |
def update_report(page, old_pmid, new_pmid, ): | |
report = pywikibot.Page(site, 'Wikipedia:WikiProject Medicine/Cochrane update/August 2017') | |
report_text = report.get() | |
rep = u'\n*Article [[%s]] ([{{fullurl:%s|action=edit}} edit]) old review [https://www.ncbi.nlm.nih.gov/pubmed/%s PMID:%s] new review [https://www.ncbi.nlm.nih.gov/pubmed/%s PMID:%s]' % (page.title(), page.title(),old_pmid, old_pmid, new_pmid, new_pmid) | |
if rep in report_text: | |
return | |
report.text = report_text + rep + u' - ~~~~~' | |
report.save('Bot: Update report') | |
for page in gen: | |
try: | |
text = page.get() | |
except: | |
continue | |
if '<!-- No update needed -->' in text: | |
continue | |
pmids = re.findall(r'\|\s*?pmid\s*?\=\s*?(\d+?)\s*?\|', text) | |
print len(pmids) | |
for pmid in pmids: | |
try: | |
res = urllib2.urlopen('https://www.ncbi.nlm.nih.gov/pubmed/%s' % pmid).read().decode('utf-8') | |
except: | |
continue | |
if 'WITHDRAWN' in res: | |
continue | |
if re.search(r'<h3>Update in</h3><ul><li class="comments"><a href="/pubmed/\d+?"', res): | |
pm = re.findall(r'<h3>Update in</h3><ul><li class="comments"><a href="/pubmed/(\d+?)"', res)[0] | |
up = u'{{Update inline|reason=Updated version https://www.ncbi.nlm.nih.gov/pubmed/' + pm | |
if not up in text: | |
text = re.sub(ur'(\|\s*?pmid\s*?\=\s*?%s\s*?(?:\||\}\}).*?\< *?\/ *?ref *?\>)' % pmid,ur'\1%s}}' % up, text, re.DOTALL) | |
update_report(page, pmid, pm) | |
if text != page.text: | |
page.text = text | |
page.save(u'Bot: Adding "update inline" template') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment