Skip to content

Instantly share code, notes, and snippets.

@whym
Last active June 27, 2016 23:50
Show Gist options
  • Save whym/4af2d44d586942ab03f51800b0398425 to your computer and use it in GitHub Desktop.
Save whym/4af2d44d586942ab03f51800b0398425 to your computer and use it in GitHub Desktop.
Add interwiki links to pages with no interwiki links
#! /usr/bin/env python2
# -*- coding: utf-8 -*-
import os
import sys
import oursql
import pandas as pd
sys.path.append(os.environ['PWB'])
from scripts import interwiki
import pywikibot
def mygen():
conn = oursql.connect(host = 'enwiktionary.labsdb',
read_default_file=os.path.expanduser('~/.my.cnf'),
db = 'enwiktionary_p',
charset=None,
use_unicode=False)
cursor = conn.cursor()
cursor.execute('''
SELECT page_title, ll_lang, ll_title
FROM page
LEFT JOIN langlinks ON page_id = ll_from
WHERE page_namespace = 0
AND NOT page_is_redirect
AND NOT EXISTS(SELECT * FROM revision_userindex WHERE rev_page = page_id AND rev_user_text = "Whymbot")
GROUP BY page_id
HAVING COUNT(*) <= 1
ORDER BY RAND()
LIMIT 100000
''')
site = pywikibot.Site()
for col in list(cursor):
yield pywikibot.Page(site, list(col)[0].decode('utf-8'))
def mygen2():
site = pywikibot.Site()
df = pd.read_csv('https://quarry.wmflabs.org/run/95062/output/0/csv?download=true')
for t in df.page_title:
yield pywikibot.Page(site, t.decode('utf-8'))
if __name__ == '__main__':
args = ['-dir:%s/.pywikibot' % os.environ['HOME'],
'-lang:en',
'-family:wiktionary',
'-user:Whymbot',
'-wiktionary',
'-autonomous',
'-async',
'-query:500',
#'-simulate',
'-putthrottle:60']
args = pywikibot.handle_args(args)
for arg in args:
interwiki.globalvar.readOptions(arg)
bot = interwiki.InterwikiBot()
gen = mygen()
bot.setPageGenerator(iter(gen))
bot.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment