Skip to content

Instantly share code, notes, and snippets.

@whym
Created July 28, 2013 05:51
Show Gist options
  • Select an option

  • Save whym/6097573 to your computer and use it in GitHub Desktop.

Select an option

Save whym/6097573 to your computer and use it in GitHub Desktop.
def format_isodate(x):
from datetime import datetime
return datetime.strftime(x, '%Y-%m-%dT%H:%M:%SZ')
def parse_wikidate(x):
from datetime import datetime
return datetime.strptime(x, '%Y%m%d%H%M%S')
def connect(db):
import os
import oursql
db = db.replace('_','-')
host = db + '.userdb.toolserver.org'
conn = oursql.connect(host = host,
read_default_file=os.path.expanduser('~/.my.cnf'),
db = db.replace('-','_'),
charset=None,
use_unicode=False)
return conn
def render(text, script):
import urllib2
import json
url = script
data ='format=json&action=parse&text=%s' % (urllib2.quote(text))
while True:
try:
#print >>sys.stderr, 'fetching %s' % url
res = urllib2.urlopen(urllib2.Request(url,
data=data,
headers={'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'})).read()
break
except urllib2.URLError, e:
import sys
print >>sys.stderr, e
exit(1)
return json.loads(res.decode('utf-8'))['parse']['text']['*']
def render_html(site, wikitext, postprocess=lambda x: x, title='(auto generated HTML)'):
from datetime import datetime
lang = site.split('.')[0]
header = '''
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html lang="%(lang)s" dir="ltr" class="client-nojs" xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>%(title)s</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta http-equiv="Content-Style-Type" content="text/css" />
<link rel="copyright" href="//creativecommons.org/licenses/by-sa/3.0/" />
<link rel="stylesheet" href="//bits.wikimedia.org/%(site)s/load.php?debug=false&amp;lang=%(lang)s&amp;modules=site&amp;only=styles&amp;skin=vector&amp;*" type="text/css" media="all" />
<style type="text/css" media="all">a:lang(ar),a:lang(ckb),a:lang(fa),a:lang(kk-arab),a:lang(mzn),a:lang(ps),a:lang(ur){text-decoration:none}a.new,#quickbar a.new{color:#ba0000}
</style>
<script src="//bits.wikimedia.org/%(site)s/load.php?debug=false&amp;lang=%(lang)s&amp;modules=startup&amp;only=scripts&amp;skin=vector&amp;*" type="text/javascript"></script>
<script type="text/javascript" src="//toolserver.org/~whym/jquery/jquery-latest.js"></script>
<script type="text/javascript" src="//toolserver.org/~whym/jquery/jquery.tablesorter.js"></script>
<script type="text/javascript">
$(document).ready(function()
{
$(".sortable").tablesorter();
}
);
</script>
</head>
<body class="mediawiki ltr sitedir-ltr ns-0 ns-subject skin-vector action-view">
''' % {'site': site, 'lang': lang, 'title': title}
footer = '''
<!-- /footer -->
<script type="text/javascript">if(window.mw){
mw.loader.load(["mediawiki.user","mediawiki.page.ready","mediawiki.legacy.mwsuggest","ext.vector.collapsibleNav","ext.vector.collapsibleTabs","ext.vector.editWarning","ext.vector.simpleSearch"], null, true);
}</script>
</body>
</html>
'''
header = header % {'site': site}
script = 'http://%s/w/api.php' % site
return """
%s
%s
%s
""" % (header,
postprocess(render("\n".join(wikitext), script).encode('utf-8').replace('="/wiki/', '="//'+site+'/wiki/').replace('="/w/', '="//'+site+'/w/')),
footer)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment